commit 615f69bcb522cc56cfe10ac7bab951ee9f8715a9 Author: Jérôme Schneider Date: Fri Oct 31 16:50:38 2014 +0100 Initial import from tarball diff --git a/CONTRIBUTORS.txt b/CONTRIBUTORS.txt new file mode 100644 index 0000000..edf2f12 --- /dev/null +++ b/CONTRIBUTORS.txt @@ -0,0 +1,162 @@ +Every contribution to Celery is as important to us, +as every coin in the money bin is to Scrooge McDuck. + +The first commit to the Celery codebase was made on +Fri Apr 24 13:30:00 2009 +0200, and has since +then been improved by many contributors. + +Everyone who have ever contributed to Celery should be in +this list, but in a recent policy change it has been decided +that everyone must add themselves here, and not be added +by others, so it's currently incomplete waiting for everyone +to add their names. + +The full list of authors can be found in docs/AUTHORS.txt. + +-- + +Contributor offers to license certain software (a “Contribution” or multiple +“Contributions”) to Celery, and Celery agrees to accept said Contributions, +under the terms of the BSD open source license. +Contributor understands and agrees that Celery shall have the irrevocable and perpetual right to make +and distribute copies of any Contribution, as well as to create and distribute collective works and +derivative works of any Contribution, under the BSD License. + +Contributors +------------ + +Ask Solem, 2012/06/07 +Sean O'Connor, 2012/06/07 +Patrick Altman, 2012/06/07 +Chris St. Pierre, 2012/06/07 +Jeff Terrace, 2012/06/07 +Mark Lavin, 2012/06/07 +Jesper Noehr, 2012/06/07 +Brad Jasper, 2012/06/07 +Juan Catalano, 2012/06/07 +Luke Zapart, 2012/06/07 +Roger Hu, 2012/06/07 +Honza Král, 2012/06/07 +Aaron Elliot Ross, 2012/06/07 +Alec Clowes, 2012/06/07 +Daniel Watkins, 2012/06/07 +Timo Sugliani, 2012/06/07 +Yury V. Zaytsev, 2012/06/7 +Marcin Kuźmiński, 2012/06/07 +Norman Richards, 2012/06/07 +Kevin Tran, 2012/06/07 +David Arthur, 2012/06/07 +Bryan Berg, 2012/06/07 +Mikhail Korobov, 2012/06/07 +Jerzy Kozera, 2012/06/07 +Ben Firshman, 2012/06/07 +Jannis Leidel, 2012/06/07 +Chris Rose, 2012/06/07 +Julien Poissonnier, 2012/06/07 +Łukasz Oleś, 2012/06/07 +David Strauss, 2012/06/07 +Chris Streeter, 2012/06/07 +Thomas Johansson, 2012/06/07 +Ales Zoulek, 2012/06/07 +Clay Gerrard, 2012/06/07 +Matt Williamson, 2012/06/07 +Travis Swicegood, 2012/06/07 +Jeff Balogh, 2012/06/07 +Harm Verhagen, 2012/06/07 +Wes Winham, 2012/06/07 +David Cramer, 2012/06/07 +Steeve Morin, 2012/06/07 +Mher Movsisyan, 2012/06/08 +Chris Peplin, 2012/06/07 +Florian Apolloner, 2012/06/07 +Juarez Bochi, 2012/06/07 +Christopher Angove, 2012/06/07 +Jason Pellerin, 2012/06/07 +Miguel Hernandez Martos, 2012/06/07 +Neil Chintomby, 2012/06/07 +Mauro Rocco, 2012/06/07 +Ionut Turturica, 2012/06/07 +Adriano Petrich, 2012/06/07 +Michael Elsdörfer, 2012/06/07 +Kornelijus Survila, 2012/06/07 +Stefán Kjartansson, 2012/06/07 +Keith Perkins, 2012/06/07 +Flavio Percoco, 2012/06/07 +Wes Turner, 2012/06/07 +Vitaly Babiy, 2012/06/07 +Tayfun Sen, 2012/06/08 +Gert Van Gool, 2012/06/08 +Akira Matsuzaki, 2012/06/08 +Simon Josi, 2012/06/08 +Sam Cooke, 2012/06/08 +Frederic Junod, 2012/06/08 +Roberto Gaiser, 2012/06/08 +Piotr Sikora, 2012/06/08 +Chris Adams, 2012/06/08 +Branko Čibej, 2012/06/08 +Vladimir Kryachko, 2012/06/08 +Remy Noel 2012/06/08 +Jude Nagurney, 2012/06/09 +Jonatan Heyman, 2012/06/10 +David Miller 2012/06/11 +Matthew Morrison, 2012/06/11 +Leo Dirac, 2012/06/11 +Mark Thurman, 2012/06/11 +Dimitrios Kouzis-Loukas, 2012/06/13 +Steven Skoczen, 2012/06/17 +Loren Abrams, 2012/06/19 +Eran Rundstein, 2012/06/24 +John Watson, 2012/06/27 +Matt Long, 2012/07/04 +David Markey, 2012/07/05 +Jared Biel, 2012/07/05 +Jed Smith, 2012/07/08 +Łukasz Langa, 2012/07/10 +Rinat Shigapov, 2012/07/20 +Hynek Schlawack, 2012/07/23 +Paul McMillan, 2012/07/26 +Mitar, 2012/07/28 +Adam DePue, 2012/08/22 +Thomas Meson, 2012/08/28 +Daniel Lundin, 2012/08/30 +Alexey Zatelepin, 2012/09/18 +Sundar Raman, 2012/09/24 +Henri Colas, 2012/11/16 +Thomas Grainger, 2012/11/29 +Marius Gedminas, 2012/11/29 +Christoph Krybus, 2013/01/07 +Jun Sakai, 2013/01/16 +Vlad Frolov, 2013/01/23 +Milen Pavlov, 2013/03/08 +Pär Wieslander, 2013/03/20 +Theo Spears, 2013/03/28 +Romuald Brunet, 2013/03/29 +Aaron Harnly, 2013/04/04 +Peter Brook, 2013/05/09 +Muneyuki Noguchi, 2013/04/24 +Stas Rudakou, 2013/05/29 +Dong Weiming, 2013/06/27 +Oleg Anashkin, 2013/06/27 +Ross Lawley, 2013/07/05 +Alain Masiero, 2013/08/07 +Adrien Guinet, 2013/08/14 +Christopher Lee, 2013/08/29 +Alexander Smirnov, 2013/08/30 +Matt Robenolt, 2013/08/31 +Jameel Al-Aziz, 2013/10/04 +Fazleev Maksim, 2013/10/08 +Ian A Wilson, 2013/10/18 +Daniel M Taub, 2013/10/22 +Matt Wise, 2013/11/06 +Michael Robellard, 2013/11/07 +Vsevolod Kulaga, 2013/11/16 +Ionel Cristian Mărieș, 2013/12/09 +Константин Подшумок, 2013/12/16 +Antoine Legrand, 2014/01/09 +Pepijn de Vos, 2014/01/15 +Dan McGee, 2014/01/27 +Paul Kilgo, 2014/01/28 +Martin Davidsson, 2014/02/08 +Chris Clark, 2014/02/20 +Matthew Duggan, 2014/04/10 +Brian Bouterse, 2014/04/10 diff --git a/Changelog b/Changelog new file mode 100644 index 0000000..d2d79f9 --- /dev/null +++ b/Changelog @@ -0,0 +1,1094 @@ +.. _changelog: + +================ + Change history +================ + +This document contains change notes for bugfix releases in the 3.1.x series +(Cipater), please see :ref:`whatsnew-3.1` for an overview of what's +new in Celery 3.1. + +.. _version-3.1.13: + +3.1.13 +====== + +Security Fixes +-------------- + +* [Security: `CELERYSA-0002`_] Insecure default umask. + + The built-in utility used to daemonize the Celery worker service sets + an insecure umask by default (umask 0). + + This means that any files or directories created by the worker will + end up having world-writable permissions. + + Special thanks to Red Hat for originally discovering and reporting the + issue! + + This version will no longer set a default umask by default, so if unset + the umask of the parent process will be used. + +.. _`CELERYSA-0002`: + http://github.com/celery/celery/tree/master/docs/sec/CELERYSA-0002.txt + +News +---- + +- **Requirements** + + - Now depends on :ref:`Kombu 3.0.21 `. + + - Now depends on :mod:`billiard` 3.3.0.18. + + +- **App**: ``backend`` argument now also sets the :setting:`CELERY_RESULT_BACKEND` + setting. + +- **Task**: ``signature_from_request`` now propagates ``reply_to`` so that + the RPC backend works with retried tasks (Issue #2113). + +- **Task**: ``retry`` will no longer attempt to requeue the task if sending + the retry message fails. + + Unrelated exceptions being raised could cause a message loop, so it was + better to remove this behavior. + +- **Beat**: Accounts for standard 1ms drift by always waking up 0.010s + earlier. + + This will adjust the latency so that the periodic tasks will not move + 1ms after every invocation. + +- Documentation fixes + + Contributed by Yuval Greenfield, Lucas Wiman, nicholsonjf + +- **Worker**: Removed an outdated assert statement that could lead to errors + being masked (Issue #2086). + + + +.. _version-3.1.12: + +3.1.12 +====== +:release-date: 2014-06-09 10:12 P.M UTC +:release-by: Ask Solem + +- **Requirements** + + Now depends on :ref:`Kombu 3.0.19 `. + +- **App**: Connections were not being closed after fork due to an error in the + after fork handler (Issue #2055). + + This could manifest itself by causing framing errors when using RabbitMQ. + (``Unexpected frame``). + +- **Django**: ``django.setup()`` was being called too late when + using Django 1.7 (Issue #1802). + +- **Django**: Fixed problems with event timezones when using Django + (``Substantial drift``). + + Celery did not take into account that Django modifies the + ``time.timeone`` attributes and friends. + +- **Canvas**: ``Signature.link`` now works when the link option is a scalar + value (Issue #2019). + +- **Prefork pool**: Fixed race conditions for when file descriptors are + removed from the event loop. + + Fix contributed by Roger Hu. + +- **Prefork pool**: Improved solution for dividing tasks between child + processes. + + This change should improve performance when there are many child + processes, and also decrease the chance that two subsequent tasks are + written to the same child process. + +- **Worker**: Now ignores unknown event types, instead of crashing. + + Fix contributed by Illes Solt. + +- **Programs**: :program:`celery worker --detach` no longer closes open file + descriptors when :envvar:`C_FAKEFORK` is used so that the workers output + can be seen. + +- **Programs**: The default working directory for :program:`celery worker + --detach` is now the current working directory, not ``/``. + +- **Canvas**: ``signature(s, app=app)`` did not upgrade serialized signatures + to their original class (``subtask_type``) when the ``app`` keyword argument + was used. + +- **Control**: The ``duplicate nodename`` warning emitted by control commands + now shows the duplicate node name. + +- **Tasks**: Can now call ``ResultSet.get()`` on a result set without members. + + Fix contributed by Alexey Kotlyarov. + +- **App**: Fixed strange traceback mangling issue for + ``app.connection_or_acquire``. + +- **Programs**: The :program:`celery multi stopwait` command is now documented + in usage. + +- **Other**: Fixed cleanup problem with ``PromiseProxy`` when an error is + raised while trying to evaluate the promise. + +- **Other**: The utility used to censor configuration values now handles + non-string keys. + + Fix contributed by Luke Pomfrey. + +- **Other**: The ``inspect conf`` command did not handle non-string keys well. + + Fix contributed by Jay Farrimond. + +- **Programs**: Fixed argument handling problem in + :program:`celery worker --detach`. + + Fix contributed by Dmitry Malinovsky. + +- **Programs**: :program:`celery worker --detach` did not forward working + directory option (Issue #2003). + +- **Programs**: :program:`celery inspect registered` no longer includes + the list of built-in tasks. + +- **Worker**: The ``requires`` attribute for boot steps were not being handled + correctly (Issue #2002). + +- **Eventlet**: The eventlet pool now supports the ``pool_grow`` and + ``pool_shrink`` remote control commands. + + Contributed by Mher Movsisyan. + +- **Eventlet**: The eventlet pool now implements statistics for + :program:``celery inspect stats``. + + Contributed by Mher Movsisyan. + +- **Documentation**: Clarified ``Task.rate_limit`` behavior. + + Contributed by Jonas Haag. + +- **Documentation**: ``AbortableTask`` examples now updated to use the new + API (Issue #1993). + +- **Documentation**: The security documentation examples used an out of date + import. + + Fix contributed by Ian Dees. + +- **Init scripts**: The CentOS init scripts did not quote + :envvar:`CELERY_CHDIR`. + + Fix contributed by ffeast. + +.. _version-3.1.11: + +3.1.11 +====== +:release-date: 2014-04-16 11:00 P.M UTC +:release-by: Ask Solem + +- **Now compatible with RabbitMQ 3.3.0** + + You need to run Celery 3.1.11 or later when using RabbitMQ 3.3, + and if you use the ``librabbitmq`` module you also have to upgrade + to librabbitmq 1.5.0: + + .. code-block:: bash + + $ pip install -U librabbitmq + +- **Requirements**: + + - Now depends on :ref:`Kombu 3.0.15 `. + + - Now depends on `billiard 3.3.0.17`_. + + - Bundle ``celery[librabbitmq]`` now depends on :mod:`librabbitmq` 1.5.0. + +.. _`billiard 3.3.0.17`: + https://github.com/celery/billiard/blob/master/CHANGES.txt + +- **Tasks**: The :setting:`CELERY_DEFAULT_DELIVERY_MODE` setting was being + ignored (Issue #1953). + +- **Worker**: New :option:`--heartbeat-interval` can be used to change the + time (in seconds) between sending event heartbeats. + + Contributed by Matthew Duggan and Craig Northway. + +- **App**: Fixed memory leaks occurring when creating lots of temporary + app instances (Issue #1949). + +- **MongoDB**: SSL configuration with non-MongoDB transport breaks MongoDB + results backend (Issue #1973). + + Fix contributed by Brian Bouterse. + +- **Logging**: The color formatter accidentally modified ``record.msg`` + (Issue #1939). + +- **Results**: Fixed problem with task trails being stored multiple times, + causing ``result.collect()`` to hang (Issue #1936, Issue #1943). + +- **Results**: ``ResultSet`` now implements a ``.backend`` attribute for + compatibility with ``AsyncResult``. + +- **Results**: ``.forget()`` now also clears the local cache. + +- **Results**: Fixed problem with multiple calls to ``result._set_cache`` + (Issue #1940). + +- **Results**: ``join_native`` populated result cache even if disabled. + +- **Results**: The YAML result serializer should now be able to handle storing + exceptions. + +- **Worker**: No longer sends task error emails for expected errors (in + ``@task(throws=(..., )))``. + +- **Canvas**: Fixed problem with exception deserialization when using + the JSON serializer (Issue #1987). + +- **Eventlet**: Fixes crash when ``celery.contrib.batches`` attempted to + cancel a non-existing timer (Issue #1984). + +- Can now import ``celery.version_info_t``, and ``celery.five`` (Issue #1968). + + +.. _version-3.1.10: + +3.1.10 +====== +:release-date: 2014-03-22 09:40 P.M UTC +:release-by: Ask Solem + +- **Requirements**: + + - Now depends on :ref:`Kombu 3.0.14 `. + +- **Results**: + + Reliability improvements to the SQLAlchemy database backend. Previously the + connection from the MainProcess was improperly shared with the workers. + (Issue #1786) + +- **Redis:** Important note about events (Issue #1882). + + There is a new transport option for Redis that enables monitors + to filter out unwanted events. Enabling this option in the workers + will increase performance considerably: + + .. code-block:: python + + BROKER_TRANSPORT_OPTIONS = {'fanout_patterns': True} + + Enabling this option means that your workers will not be able to see + workers with the option disabled (or is running an older version of + Celery), so if you do enable it then make sure you do so on all + nodes. + + See :ref:`redis-caveats-fanout-patterns`. + + This will be the default in Celery 3.2. + +- **Results**: The :class:`@AsyncResult` object now keeps a local cache + of the final state of the task. + + This means that the global result cache can finally be disabled, + and you can do so by setting :setting:`CELERY_MAX_CACHED_RESULTS` to + :const:`-1`. The lifetime of the cache will then be bound to the + lifetime of the result object, which will be the default behavior + in Celery 3.2. + +- **Events**: The "Substantial drift" warning message is now logged once + per node name only (Issue #1802). + +- **Worker**: Ability to use one log file per child process when using the + prefork pool. + + This can be enabled by using the new ``%i`` and ``%I`` format specifiers + for the log file name. See :ref:`worker-files-process-index`. + +- **Redis**: New experimental chord join implementation. + + This is an optimization for chords when using the Redis result backend, + where the join operation is now considerably faster and using less + resources than the previous strategy. + + The new option can be set in the result backend URL: + + CELERY_RESULT_BACKEND = 'redis://localhost?new_join=1' + + This must be enabled manually as it's incompatible + with workers and clients not using it, so be sure to enable + the option in all clients and workers if you decide to use it. + +- **Multi**: With ``-opt:index`` (e.g. :option:`-c:1`) the index now always refers + to the position of a node in the argument list. + + This means that referring to a number will work when specifying a list + of node names and not just for a number range: + + .. code-block:: bash + + celery multi start A B C D -c:1 4 -c:2-4 8 + + In this example ``1`` refers to node A (as it's the first node in the + list). + +- **Signals**: The sender argument to ``Signal.connect`` can now be a proxy + object, which means that it can be used with the task decorator + (Issue #1873). + +- **Task**: A regression caused the ``queue`` argument to ``Task.retry`` to be + ignored (Issue #1892). + +- **App**: Fixed error message for :meth:`~@Celery.config_from_envvar`. + + Fix contributed by Dmitry Malinovsky. + +- **Canvas**: Chords can now contain a group of other chords (Issue #1921). + +- **Canvas**: Chords can now be combined when using the amqp result backend + (a chord where the callback is also a chord). + +- **Canvas**: Calling ``result.get()`` for a chain task will now complete + even if one of the tasks in the chain is ``ignore_result=True`` + (Issue #1905). + +- **Canvas**: Worker now also logs chord errors. + +- **Canvas**: A chord task raising an exception will now result in + any errbacks (``link_error``) to the chord callback to also be called. + +- **Results**: Reliability improvements to the SQLAlchemy database backend + (Issue #1786). + + Previously the connection from the ``MainProcess`` was improperly + inherited by child processes. + + Fix contributed by Ionel Cristian Mărieș. + +- **Task**: Task callbacks and errbacks are now called using the group + primitive. + +- **Task**: ``Task.apply`` now properly sets ``request.headers`` + (Issue #1874). + +- **Worker**: Fixed ``UnicodeEncodeError`` occuring when worker is started + by `supervisord`. + + Fix contributed by Codeb Fan. + +- **Beat**: No longer attempts to upgrade a newly created database file + (Issue #1923). + +- **Beat**: New setting :setting:``CELERYBEAT_SYNC_EVERY`` can be be used + to control file sync by specifying the number of tasks to send between + each sync. + + Contributed by Chris Clark. + +- **Commands**: :program:`celery inspect memdump` no longer crashes + if the :mod:`psutil` module is not installed (Issue #1914). + +- **Worker**: Remote control commands now always accepts json serialized + messages (Issue #1870). + +- **Worker**: Gossip will now drop any task related events it receives + by mistake (Issue #1882). + + +.. _version-3.1.9: + +3.1.9 +===== +:release-date: 2014-02-10 06:43 P.M UTC +:release-by: Ask Solem + +- **Requirements**: + + - Now depends on :ref:`Kombu 3.0.12 `. + +- **Prefork pool**: Better handling of exiting child processes. + + Fix contributed by Ionel Cristian Mărieș. + +- **Prefork pool**: Now makes sure all file descriptors are removed + from the hub when a process is cleaned up. + + Fix contributed by Ionel Cristian Mărieș. + +- **New Sphinx extension**: for autodoc documentation of tasks: + :mod:`celery.contrib.spinx` (Issue #1833). + +- **Django**: Now works with Django 1.7a1. + +- **Task**: Task.backend is now a property that forwards to ``app.backend`` + if no custom backend has been specified for the task (Issue #1821). + +- **Generic init scripts**: Fixed bug in stop command. + + Fix contributed by Rinat Shigapov. + +- **Generic init scripts**: Fixed compatibility with GNU :manpage:`stat`. + + Fix contributed by Paul Kilgo. + +- **Generic init scripts**: Fixed compatibility with the minimal + :program:`dash` shell (Issue #1815). + +- **Commands**: The :program:`celery amqp basic.publish` command was not + working properly. + + Fix contributed by Andrey Voronov. + +- **Commands**: Did no longer emit an error message if the pidfile exists + and the process is still alive (Issue #1855). + +- **Commands**: Better error message for missing arguments to preload + options (Issue #1860). + +- **Commands**: :program:`celery -h` did not work because of a bug in the + argument parser (Issue #1849). + +- **Worker**: Improved error message for message decoding errors. + +- **Time**: Now properly parses the `Z` timezone specifier in ISO 8601 date + strings. + + Fix contributed by Martin Davidsson. + +- **Worker**: Now uses the *negotiated* heartbeat value to calculate + how often to run the heartbeat checks. + +- **Beat**: Fixed problem with beat hanging after the first schedule + iteration (Issue #1822). + + Fix contributed by Roger Hu. + +- **Signals**: The header argument to :signal:`before_task_publish` is now + always a dictionary instance so that signal handlers can add headers. + +- **Worker**: A list of message headers is now included in message related + errors. + +.. _version-3.1.8: + +3.1.8 +===== +:release-date: 2014-01-17 10:45 P.M UTC +:release-by: Ask Solem + +- **Requirements**: + + - Now depends on :ref:`Kombu 3.0.10 `. + + - Now depends on `billiard 3.3.0.14`_. + +.. _`billiard 3.3.0.14`: + https://github.com/celery/billiard/blob/master/CHANGES.txt + +- **Worker**: The event loop was not properly reinitialized at consumer restart + which would force the worker to continue with a closed ``epoll`` instance on + Linux, resulting in a crash. + +- **Events:** Fixed issue with both heartbeats and task events that could + result in the data not being kept in sorted order. + + As a result this would force the worker to log "heartbeat missed" + events even though the remote node was sending heartbeats in a timely manner. + +- **Results:** The pickle serializer no longer converts group results to tuples, + and will keep the original type (*Issue #1750*). + +- **Results:** ``ResultSet.iterate`` is now pending deprecation. + + The method will be deprecated in version 3.2 and removed in version 3.3. + + Use ``result.get(callback=)`` (or ``result.iter_native()`` where available) + instead. + +- **Worker**\|eventlet/gevent: A regression caused ``Ctrl+C`` to be ineffective + for shutdown. + +- **Redis result backend:** Now using a pipeline to store state changes + for improved performance. + + Contributed by Pepijn de Vos. + +- **Redis result backend:** Will now retry storing the result if disconnected. + +- **Worker**\|gossip: Fixed attribute error occurring when another node leaves. + + Fix contributed by Brodie Rao. + +- **Generic init scripts:** Now runs a check at startup to verify + that any configuration scripts are owned by root and that they + are not world/group writeable. + + The init script configuration is a shell script executed by root, + so this is a preventive measure to ensure that users do not + leave this file vulnerable to changes by unprivileged users. + + .. note:: + + Note that upgrading celery will not update the init scripts, + instead you need to manually copy the improved versions from the + source distribution: + https://github.com/celery/celery/tree/3.1/extra/generic-init.d + +- **Commands**: The :program:`celery purge` command now warns that the operation + will delete all tasks and prompts the user for confirmation. + + A new :option:`-f` was added that can be used to disable + interactive mode. + +- **Task**: ``.retry()`` did not raise the value provided in the ``exc`` argument + when called outside of an error context (*Issue #1755*). + +- **Commands:** The :program:`celery multi` command did not forward command + line configuration to the target workers. + + The change means that multi will forward the special ``--`` argument and + configuration content at the end of the arguments line to the specified + workers. + + Example using command-line configuration to set a broker heartbeat + from :program:`celery multi`: + + .. code-block:: bash + + $ celery multi start 1 -c3 -- broker.heartbeat=30 + + Fix contributed by Antoine Legrand. + +- **Canvas:** ``chain.apply_async()`` now properly forwards execution options. + + Fix contributed by Konstantin Podshumok. + +- **Redis result backend:** Now takes ``connection_pool`` argument that can be + used to change the connection pool class/constructor. + +- **Worker:** Now truncates very long arguments and keyword arguments logged by + the pool at debug severity. + +- **Worker:** The worker now closes all open files on :sig:`SIGHUP` (regression) + (*Issue #1768*). + + Fix contributed by Brodie Rao + +- **Worker:** Will no longer accept remote control commands while the + worker startup phase is incomplete (*Issue #1741*). + +- **Commands:** The output of the event dump utility + (:program:`celery events -d`) can now be piped into other commands. + +- **Documentation:** The RabbitMQ installation instructions for OS X was + updated to use modern homebrew practices. + + Contributed by Jon Chen. + +- **Commands:** The :program:`celery inspect conf` utility now works. + +- **Commands:** The :option:`-no-color` argument was not respected by + all commands (*Issue #1799*). + +- **App:** Fixed rare bug with ``autodiscover_tasks()`` (*Issue #1797*). + +- **Distribution:** The sphinx docs will now always add the parent directory + to path so that the current celery source code is used as a basis for + API documentation (*Issue #1782*). + +- **Documentation:** Supervisord examples contained an extraneous '-' in a + `--logfile` argument example. + + Fix contributed by Mohammad Almeer. + +.. _version-3.1.7: + +3.1.7 +===== +:release-date: 2013-12-17 06:00 P.M UTC +:release-by: Ask Solem + +.. _v317-important: + +Important Notes +--------------- + +Init script security improvements +--------------------------------- + +Where the generic init scripts (for ``celeryd``, and ``celerybeat``) before +delegated the responsibility of dropping privileges to the target application, +it will now use ``su`` instead, so that the Python program is not trusted +with superuser privileges. + +This is not in reaction to any known exploit, but it will +limit the possibility of a privilege escalation bug being abused in the +future. + +You have to upgrade the init scripts manually from this directory: +https://github.com/celery/celery/tree/3.1/extra/generic-init.d + +AMQP result backend +~~~~~~~~~~~~~~~~~~~ + +The 3.1 release accidentally left the amqp backend configured to be +non-persistent by default. + +Upgrading from 3.0 would give a "not equivalent" error when attempting to +set or retrieve results for a task. That is unless you manually set the +persistence setting:: + + CELERY_RESULT_PERSISTENT = True + +This version restores the previous value so if you already forced +the upgrade by removing the existing exchange you must either +keep the configuration by setting ``CELERY_RESULT_PERSISTENT = False`` +or delete the ``celeryresults`` exchange again. + +Synchronous subtasks +~~~~~~~~~~~~~~~~~~~~ + +Tasks waiting for the result of a subtask will now emit +a :exc:`RuntimeWarning` warning when using the prefork pool, +and in 3.2 this will result in an exception being raised. + +It's not legal for tasks to block by waiting for subtasks +as this is likely to lead to resource starvation and eventually +deadlock when using the prefork pool (see also :ref:`task-synchronous-subtasks`). + +If you really know what you are doing you can avoid the warning (and +the future exception being raised) by moving the operation in a whitelist +block: + +.. code-block:: python + + from celery.result import allow_join_result + + @app.task + def misbehaving(): + result = other_task.delay() + with allow_join_result(): + result.get() + +Note also that if you wait for the result of a subtask in any form +when using the prefork pool you must also disable the pool prefetching +behavior with the worker :ref:`-Ofair option `. + +.. _v317-fixes: + +Fixes +----- + +- Now depends on :ref:`Kombu 3.0.8 `. + +- Now depends on :mod:`billiard` 3.3.0.13 + +- Events: Fixed compatibility with non-standard json libraries + that sends float as :class:`decimal.Decimal` (Issue #1731) + +- Events: State worker objects now always defines attributes: + ``active``, ``processed``, ``loadavg``, ``sw_ident``, ``sw_ver`` + and ``sw_sys``. + +- Worker: Now keeps count of the total number of tasks processed, + not just by type (``all_active_count``). + +- Init scripts: Fixed problem with reading configuration file + when the init script is symlinked to a runlevel (e.g. ``S02celeryd``). + (Issue #1740). + + This also removed a rarely used feature where you can symlink the script + to provide alternative configurations. You instead copy the script + and give it a new name, but perhaps a better solution is to provide + arguments to ``CELERYD_OPTS`` to separate them: + + .. code-block:: bash + + CELERYD_NODES="X1 X2 Y1 Y2" + CELERYD_OPTS="-A:X1 x -A:X2 x -A:Y1 y -A:Y2 y" + +- Fallback chord unlock task is now always called after the chord header + (Issue #1700). + + This means that the unlock task will not be started if there's + an error sending the header. + +- Celery command: Fixed problem with arguments for some control commands. + + Fix contributed by Konstantin Podshumok. + +- Fixed bug in ``utcoffset`` where the offset when in DST would be + completely wrong (Issue #1743). + +- Worker: Errors occurring while attempting to serialize the result of a + task will now cause the task to be marked with failure and a + :class:`kombu.exceptions.EncodingError` error. + + Fix contributed by Ionel Cristian Mărieș. + +- Worker with ``-B`` argument did not properly shut down the beat instance. + +- Worker: The ``%n`` and ``%h`` formats are now also supported by the + :option:`--logfile`, :option:`--pidfile` and :option:`--statedb` arguments. + + Example: + + .. code-block:: bash + + $ celery -A proj worker -n foo@%h --logfile=%n.log --statedb=%n.db + +- Redis/Cache result backends: Will now timeout if keys evicted while trying + to join a chord. + +- The fallbock unlock chord task now raises :exc:`Retry` so that the + retry even is properly logged by the worker. + +- Multi: Will no longer apply Eventlet/gevent monkey patches (Issue #1717). + +- Redis result backend: Now supports UNIX sockets. + + Like the Redis broker transport the result backend now also supports + using ``redis+socket:///tmp/redis.sock`` URLs. + + Contributed by Alcides Viamontes Esquivel. + +- Events: Events sent by clients was mistaken for worker related events + (Issue #1714). + + For ``events.State`` the tasks now have a ``Task.client`` attribute + that is set when a ``task-sent`` event is being received. + + Also, a clients logical clock is not in sync with the cluster so + they live in a "time bubble". So for this reason monitors will no + longer attempt to merge with the clock of an event sent by a client, + instead it will fake the value by using the current clock with + a skew of -1. + +- Prefork pool: The method used to find terminated processes was flawed + in that it did not also take into account missing popen objects. + +- Canvas: ``group`` and ``chord`` now works with anon signatures as long + as the group/chord object is associated with an app instance (Issue #1744). + + You can pass the app by using ``group(..., app=app)``. + +.. _version-3.1.6: + +3.1.6 +===== +:release-date: 2013-12-02 06:00 P.M UTC +:release-by: Ask Solem + +- Now depends on :mod:`billiard` 3.3.0.10. + +- Now depends on :ref:`Kombu 3.0.7 `. + +- Fixed problem where Mingle caused the worker to hang at startup + (Issue #1686). + +- Beat: Would attempt to drop privileges twice (Issue #1708). + +- Windows: Fixed error with ``geteuid`` not being available (Issue #1676). + +- Tasks can now provide a list of expected error classes (Issue #1682). + + The list should only include errors that the task is expected to raise + during normal operation:: + + @task(throws=(KeyError, HttpNotFound)) + + What happens when an exceptions is raised depends on the type of error: + + - Expected errors (included in ``Task.throws``) + + Will be logged using severity ``INFO``, and traceback is excluded. + + - Unexpected errors + + Will be logged using severity ``ERROR``, with traceback included. + +- Cache result backend now compatible with Python 3 (Issue #1697). + +- CentOS init script: Now compatible with sys-v style init symlinks. + + Fix contributed by Jonathan Jordan. + +- Events: Fixed problem when task name is not defined (Issue #1710). + + Fix contributed by Mher Movsisyan. + +- Task: Fixed unbound local errors (Issue #1684). + + Fix contributed by Markus Ullmann. + +- Canvas: Now unrolls groups with only one task (optimization) (Issue #1656). + +- Task: Fixed problem with eta and timezones. + + Fix contributed by Alexander Koval. + +- Django: Worker now performs model validation (Issue #1681). + +- Task decorator now emits less confusing errors when used with + incorrect arguments (Issue #1692). + +- Task: New method ``Task.send_event`` can be used to send custom events + to Flower and other monitors. + +- Fixed a compatibility issue with non-abstract task classes + +- Events from clients now uses new node name format (``gen@``). + +- Fixed rare bug with Callable not being defined at interpreter shutdown + (Issue #1678). + + Fix contributed by Nick Johnson. + +- Fixed Python 2.6 compatibility (Issue #1679). + +.. _version-3.1.5: + +3.1.5 +===== +:release-date: 2013-11-21 06:20 P.M UTC +:release-by: Ask Solem + +- Now depends on :ref:`Kombu 3.0.6 `. + +- Now depends on :mod:`billiard` 3.3.0.8 + +- App: ``config_from_object`` is now lazy (Issue #1665). + +- App: ``autodiscover_tasks`` is now lazy. + + Django users should now wrap access to the settings object + in a lambda:: + + app.autodiscover_tasks(lambda: settings.INSTALLED_APPS) + + this ensures that the settings object is not prepared + prematurely. + +- Fixed regression for ``--app`` argument experienced by + some users (Issue #1653). + +- Worker: Now respects the ``--uid`` and ``--gid`` arguments + even if ``--detach`` is not enabled. + +- Beat: Now respects the ``--uid`` and ``--gid`` arguments + even if ``--detach`` is not enabled. + +- Python 3: Fixed unorderable error occuring with the worker ``-B`` + argument enabled. + +- ``celery.VERSION`` is now a named tuple. + +- ``maybe_signature(list)`` is now applied recursively (Issue #1645). + +- ``celery shell`` command: Fixed ``IPython.frontend`` deprecation warning. + +- The default app no longer includes the builtin fixups. + + This fixes a bug where ``celery multi`` would attempt + to load the Django settings module before entering + the target working directory. + +- The Django daemonization tutorial was changed. + + Users no longer have to explicitly export ``DJANGO_SETTINGS_MODULE`` + in :file:`/etc/default/celeryd` when the new project layout is used. + +- Redis result backend: expiry value can now be 0 (Issue #1661). + +- Censoring settings now accounts for non-string keys (Issue #1663). + +- App: New ``autofinalize`` option. + + Apps are automatically finalized when the task registry is accessed. + You can now disable this behavior so that an exception is raised + instead. + + Example: + + .. code-block:: python + + app = Celery(autofinalize=False) + + # raises RuntimeError + tasks = app.tasks + + @app.task + def add(x, y): + return x + y + + # raises RuntimeError + add.delay(2, 2) + + app.finalize() + # no longer raises: + tasks = app.tasks + add.delay(2, 2) + +- The worker did not send monitoring events during shutdown. + +- Worker: Mingle and gossip is now automatically disabled when + used with an unsupported transport (Issue #1664). + +- ``celery`` command: Preload options now supports + the rare ``--opt value`` format (Issue #1668). + +- ``celery`` command: Accidentally removed options + appearing before the subcommand, these are now moved to the end + instead. + +- Worker now properly responds to ``inspect stats`` commands + even if received before startup is complete (Issue #1659). + +- :signal:`task_postrun` is now sent within a finally block, to make + sure the signal is always sent. + +- Beat: Fixed syntax error in string formatting. + + Contributed by nadad. + +- Fixed typos in the documentation. + + Fixes contributed by Loic Bistuer, sunfinite. + +- Nested chains now works properly when constructed using the + ``chain`` type instead of the ``|`` operator (Issue #1656). + +.. _version-3.1.4: + +3.1.4 +===== +:release-date: 2013-11-15 11:40 P.M UTC +:release-by: Ask Solem + +- Now depends on :ref:`Kombu 3.0.5 `. + +- Now depends on :mod:`billiard` 3.3.0.7 + +- Worker accidentally set a default socket timeout of 5 seconds. + +- Django: Fixup now sets the default app so that threads will use + the same app instance (e.g. for manage.py runserver). + +- Worker: Fixed Unicode error crash at startup experienced by some users. + +- Calling ``.apply_async`` on an empty chain now works again (Issue #1650). + +- The ``celery multi show`` command now generates the same arguments + as the start command does. + +- The ``--app`` argument could end up using a module object instead + of an app instance (with a resulting crash). + +- Fixed a syntax error problem in the celerybeat init script. + + Fix contributed by Vsevolod. + +- Tests now passing on PyPy 2.1 and 2.2. + +.. _version-3.1.3: + +3.1.3 +===== +:release-date: 2013-11-13 00:55 A.M UTC +:release-by: Ask Solem + +- Fixed compatibility problem with Python 2.7.0 - 2.7.5 (Issue #1637) + + ``unpack_from`` started supporting ``memoryview`` arguments + in Python 2.7.6. + +- Worker: :option:`-B` argument accidentally closed files used + for logging. + +- Task decorated tasks now keep their docstring (Issue #1636) + +.. _version-3.1.2: + +3.1.2 +===== +:release-date: 2013-11-12 08:00 P.M UTC +:release-by: Ask Solem + +- Now depends on :mod:`billiard` 3.3.0.6 + +- No longer needs the billiard C extension to be installed. + +- The worker silently ignored task errors. + +- Django: Fixed ``ImproperlyConfigured`` error raised + when no database backend specified. + + Fix contributed by j0hnsmith + +- Prefork pool: Now using ``_multiprocessing.read`` with ``memoryview`` + if available. + +- ``close_open_fds`` now uses ``os.closerange`` if available. + +- ``get_fdmax`` now takes value from ``sysconfig`` if possible. + +.. _version-3.1.1: + +3.1.1 +===== +:release-date: 2013-11-11 06:30 P.M UTC +:release-by: Ask Solem + +- Now depends on :mod:`billiard` 3.3.0.4. + +- Python 3: Fixed compatibility issues. + +- Windows: Accidentally showed warning that the billiard C extension + was not installed (Issue #1630). + +- Django: Tutorial updated with a solution that sets a default + :envvar:`DJANGO_SETTINGS_MODULE` so that it doesn't have to be typed + in with the :program:`celery` command. + + Also fixed typos in the tutorial, and added the settings + required to use the Django database backend. + + Thanks to Chris Ward, orarbel. + +- Django: Fixed a problem when using the Django settings in Django 1.6. + +- Django: Fixup should not be applied if the django loader is active. + +- Worker: Fixed attribute error for ``human_write_stats`` when using the + compatibility prefork pool implementation. + +- Worker: Fixed compatibility with billiard without C extension. + +- Inspect.conf: Now supports a ``with_defaults`` argument. + +- Group.restore: The backend argument was not respected. + +.. _version-3.1.0: + +3.1.0 +======= +:release-date: 2013-11-09 11:00 P.M UTC +:release-by: Ask Solem + +See :ref:`whatsnew-3.1`. diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..aeb3da0 --- /dev/null +++ b/LICENSE @@ -0,0 +1,53 @@ +Copyright (c) 2009, 2010, 2011, 2012 Ask Solem, and individual contributors. All Rights Reserved. +Copyright (c) 2012-2014 GoPivotal, Inc. All rights reserved. + +Celery is licensed under The BSD License (3 Clause, also known as +the new BSD license). The license is an OSI approved Open Source +license and is GPL-compatible(1). + +The license text can also be found here: +http://www.opensource.org/licenses/BSD-3-Clause + +License +======= + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Ask Solem, nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL Ask Solem OR CONTRIBUTORS +BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + +Documentation License +===================== + +The documentation portion of Celery (the rendered contents of the +"docs" directory of a software distribution or checkout) is supplied +under the Creative Commons Attribution-Noncommercial-Share Alike 3.0 +United States License as described by +http://creativecommons.org/licenses/by-nc-sa/3.0/us/ + +Footnotes +========= +(1) A GPL-compatible license makes it possible to + combine Celery with other software that is released + under the GPL, it does not mean that we're distributing + Celery under the GPL license. The BSD license, unlike the GPL, + let you distribute a modified version without making your + changes open source. diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..d282745 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,21 @@ +include CONTRIBUTORS.txt +include Changelog +include LICENSE +include README.rst +include MANIFEST.in +include TODO +include setup.cfg +include setup.py +recursive-include celery *.py +recursive-include docs * +recursive-include extra/bash-completion * +recursive-include extra/centos * +recursive-include extra/generic-init.d * +recursive-include extra/osx * +recursive-include extra/supervisord * +recursive-include extra/systemd * +recursive-include extra/zsh-completion * +recursive-include examples * +recursive-include requirements *.txt +prune *.pyc +prune *.sw* diff --git a/PKG-INFO b/PKG-INFO new file mode 100644 index 0000000..d5b27da --- /dev/null +++ b/PKG-INFO @@ -0,0 +1,455 @@ +Metadata-Version: 1.1 +Name: celery +Version: 3.1.13 +Summary: Distributed Task Queue +Home-page: http://celeryproject.org +Author: Ask Solem +Author-email: ask@celeryproject.org +License: BSD +Description: ================================= + celery - Distributed Task Queue + ================================= + + .. image:: http://cloud.github.com/downloads/celery/celery/celery_128.png + + :Version: 3.1.13 (Cipater) + :Web: http://celeryproject.org/ + :Download: http://pypi.python.org/pypi/celery/ + :Source: http://github.com/celery/celery/ + :Keywords: task queue, job queue, asynchronous, async, rabbitmq, amqp, redis, + python, webhooks, queue, distributed + + -- + + What is a Task Queue? + ===================== + + Task queues are used as a mechanism to distribute work across threads or + machines. + + A task queue's input is a unit of work, called a task, dedicated worker + processes then constantly monitor the queue for new work to perform. + + Celery communicates via messages, usually using a broker + to mediate between clients and workers. To initiate a task a client puts a + message on the queue, the broker then delivers the message to a worker. + + A Celery system can consist of multiple workers and brokers, giving way + to high availability and horizontal scaling. + + Celery is a library written in Python, but the protocol can be implemented in + any language. So far there's RCelery_ for the Ruby programming language, and a + `PHP client`, but language interoperability can also be achieved + by using webhooks. + + .. _RCelery: http://leapfrogdevelopment.github.com/rcelery/ + .. _`PHP client`: https://github.com/gjedeer/celery-php + .. _`using webhooks`: + http://docs.celeryproject.org/en/latest/userguide/remote-tasks.html + + What do I need? + =============== + + Celery version 3.0 runs on, + + - Python (2.5, 2.6, 2.7, 3.2, 3.3) + - PyPy (1.8, 1.9) + - Jython (2.5, 2.7). + + This is the last version to support Python 2.5, + and from Celery 3.1, Python 2.6 or later is required. + The last version to support Python 2.4 was Celery series 2.2. + + *Celery* is usually used with a message broker to send and receive messages. + The RabbitMQ, Redis transports are feature complete, + but there's also experimental support for a myriad of other solutions, including + using SQLite for local development. + + *Celery* can run on a single machine, on multiple machines, or even + across datacenters. + + Get Started + =========== + + If this is the first time you're trying to use Celery, or you are + new to Celery 3.0 coming from previous versions then you should read our + getting started tutorials: + + - `First steps with Celery`_ + + Tutorial teaching you the bare minimum needed to get started with Celery. + + - `Next steps`_ + + A more complete overview, showing more features. + + .. _`First steps with Celery`: + http://docs.celeryproject.org/en/latest/getting-started/first-steps-with-celery.html + + .. _`Next steps`: + http://docs.celeryproject.org/en/latest/getting-started/next-steps.html + + Celery is... + ============ + + - **Simple** + + Celery is easy to use and maintain, and does *not need configuration files*. + + It has an active, friendly community you can talk to for support, + including a `mailing-list`_ and and an IRC channel. + + Here's one of the simplest applications you can make:: + + from celery import Celery + + app = Celery('hello', broker='amqp://guest@localhost//') + + @app.task + def hello(): + return 'hello world' + + - **Highly Available** + + Workers and clients will automatically retry in the event + of connection loss or failure, and some brokers support + HA in way of *Master/Master* or *Master/Slave* replication. + + - **Fast** + + A single Celery process can process millions of tasks a minute, + with sub-millisecond round-trip latency (using RabbitMQ, + py-librabbitmq, and optimized settings). + + - **Flexible** + + Almost every part of *Celery* can be extended or used on its own, + Custom pool implementations, serializers, compression schemes, logging, + schedulers, consumers, producers, autoscalers, broker transports and much more. + + It supports... + ============== + + - **Message Transports** + + - RabbitMQ_, Redis_, + - MongoDB_ (experimental), Amazon SQS (experimental), + - CouchDB_ (experimental), SQLAlchemy_ (experimental), + - Django ORM (experimental), `IronMQ`_ + - and more... + + - **Concurrency** + + - Prefork, Eventlet_, gevent_, threads/single threaded + + - **Result Stores** + + - AMQP, Redis + - memcached, MongoDB + - SQLAlchemy, Django ORM + - Apache Cassandra, IronCache + + - **Serialization** + + - *pickle*, *json*, *yaml*, *msgpack*. + - *zlib*, *bzip2* compression. + - Cryptographic message signing. + + .. _`Eventlet`: http://eventlet.net/ + .. _`gevent`: http://gevent.org/ + + .. _RabbitMQ: http://rabbitmq.com + .. _Redis: http://redis.io + .. _MongoDB: http://mongodb.org + .. _Beanstalk: http://kr.github.com/beanstalkd + .. _CouchDB: http://couchdb.apache.org + .. _SQLAlchemy: http://sqlalchemy.org + .. _`IronMQ`: http://iron.io + + Framework Integration + ===================== + + Celery is easy to integrate with web frameworks, some of which even have + integration packages: + + +--------------------+------------------------+ + | `Django`_ | not needed | + +--------------------+------------------------+ + | `Pyramid`_ | `pyramid_celery`_ | + +--------------------+------------------------+ + | `Pylons`_ | `celery-pylons`_ | + +--------------------+------------------------+ + | `Flask`_ | not needed | + +--------------------+------------------------+ + | `web2py`_ | `web2py-celery`_ | + +--------------------+------------------------+ + | `Tornado`_ | `tornado-celery`_ | + +--------------------+------------------------+ + + The integration packages are not strictly necessary, but they can make + development easier, and sometimes they add important hooks like closing + database connections at ``fork``. + + .. _`Django`: http://djangoproject.com/ + .. _`Pylons`: http://pylonshq.com/ + .. _`Flask`: http://flask.pocoo.org/ + .. _`web2py`: http://web2py.com/ + .. _`Bottle`: http://bottlepy.org/ + .. _`Pyramid`: http://docs.pylonsproject.org/en/latest/docs/pyramid.html + .. _`pyramid_celery`: http://pypi.python.org/pypi/pyramid_celery/ + .. _`django-celery`: http://pypi.python.org/pypi/django-celery + .. _`celery-pylons`: http://pypi.python.org/pypi/celery-pylons + .. _`web2py-celery`: http://code.google.com/p/web2py-celery/ + .. _`Tornado`: http://www.tornadoweb.org/ + .. _`tornado-celery`: http://github.com/mher/tornado-celery/ + + .. _celery-documentation: + + Documentation + ============= + + The `latest documentation`_ with user guides, tutorials and API reference + is hosted at Read The Docs. + + .. _`latest documentation`: http://docs.celeryproject.org/en/latest/ + + .. _celery-installation: + + Installation + ============ + + You can install Celery either via the Python Package Index (PyPI) + or from source. + + To install using `pip`,:: + + $ pip install -U Celery + + To install using `easy_install`,:: + + $ easy_install -U Celery + + .. _bundles: + + Bundles + ------- + + Celery also defines a group of bundles that can be used + to install Celery and the dependencies for a given feature. + + You can specify these in your requirements or on the ``pip`` comand-line + by using brackets. Multiple bundles can be specified by separating them by + commas. + :: + + $ pip install "celery[librabbitmq]" + + $ pip install "celery[librabbitmq,redis,auth,msgpack]" + + The following bundles are available: + + Serializers + ~~~~~~~~~~~ + + :celery[auth]: + for using the auth serializer. + + :celery[msgpack]: + for using the msgpack serializer. + + :celery[yaml]: + for using the yaml serializer. + + Concurrency + ~~~~~~~~~~~ + + :celery[eventlet]: + for using the eventlet pool. + + :celery[gevent]: + for using the gevent pool. + + :celery[threads]: + for using the thread pool. + + Transports and Backends + ~~~~~~~~~~~~~~~~~~~~~~~ + + :celery[librabbitmq]: + for using the librabbitmq C library. + + :celery[redis]: + for using Redis as a message transport or as a result backend. + + :celery[mongodb]: + for using MongoDB as a message transport (*experimental*), + or as a result backend (*supported*). + + :celery[sqs]: + for using Amazon SQS as a message transport (*experimental*). + + :celery[memcache]: + for using memcached as a result backend. + + :celery[cassandra]: + for using Apache Cassandra as a result backend. + + :celery[couchdb]: + for using CouchDB as a message transport (*experimental*). + + :celery[couchbase]: + for using CouchBase as a result backend. + + :celery[beanstalk]: + for using Beanstalk as a message transport (*experimental*). + + :celery[zookeeper]: + for using Zookeeper as a message transport. + + :celery[zeromq]: + for using ZeroMQ as a message transport (*experimental*). + + :celery[sqlalchemy]: + for using SQLAlchemy as a message transport (*experimental*), + or as a result backend (*supported*). + + :celery[pyro]: + for using the Pyro4 message transport (*experimental*). + + :celery[slmq]: + for using the SoftLayer Message Queue transport (*experimental*). + + .. _celery-installing-from-source: + + Downloading and installing from source + -------------------------------------- + + Download the latest version of Celery from + http://pypi.python.org/pypi/celery/ + + You can install it by doing the following,:: + + $ tar xvfz celery-0.0.0.tar.gz + $ cd celery-0.0.0 + $ python setup.py build + # python setup.py install + + The last command must be executed as a privileged user if + you are not currently using a virtualenv. + + .. _celery-installing-from-git: + + Using the development version + ----------------------------- + + With pip + ~~~~~~~~ + + The Celery development version also requires the development + versions of ``kombu``, ``amqp`` and ``billiard``. + + You can install the latest snapshot of these using the following + pip commands:: + + $ pip install https://github.com/celery/celery/zipball/master#egg=celery + $ pip install https://github.com/celery/billiard/zipball/master#egg=billiard + $ pip install https://github.com/celery/py-amqp/zipball/master#egg=amqp + $ pip install https://github.com/celery/kombu/zipball/master#egg=kombu + + With git + ~~~~~~~~ + + Please the Contributing section. + + .. _getting-help: + + Getting Help + ============ + + .. _mailing-list: + + Mailing list + ------------ + + For discussions about the usage, development, and future of celery, + please join the `celery-users`_ mailing list. + + .. _`celery-users`: http://groups.google.com/group/celery-users/ + + .. _irc-channel: + + IRC + --- + + Come chat with us on IRC. The **#celery** channel is located at the `Freenode`_ + network. + + .. _`Freenode`: http://freenode.net + + .. _bug-tracker: + + Bug tracker + =========== + + If you have any suggestions, bug reports or annoyances please report them + to our issue tracker at http://github.com/celery/celery/issues/ + + .. _wiki: + + Wiki + ==== + + http://wiki.github.com/celery/celery/ + + .. _contributing-short: + + Contributing + ============ + + Development of `celery` happens at Github: http://github.com/celery/celery + + You are highly encouraged to participate in the development + of `celery`. If you don't like Github (for some reason) you're welcome + to send regular patches. + + Be sure to also read the `Contributing to Celery`_ section in the + documentation. + + .. _`Contributing to Celery`: + http://docs.celeryproject.org/en/master/contributing.html + + .. _license: + + License + ======= + + This software is licensed under the `New BSD License`. See the ``LICENSE`` + file in the top distribution directory for the full license text. + + .. # vim: syntax=rst expandtab tabstop=4 shiftwidth=4 shiftround + + + .. image:: https://d2weczhvl823v0.cloudfront.net/celery/celery/trend.png + :alt: Bitdeli badge + :target: https://bitdeli.com/free + + +Platform: any +Classifier: Development Status :: 5 - Production/Stable +Classifier: License :: OSI Approved :: BSD License +Classifier: Topic :: System :: Distributed Computing +Classifier: Topic :: Software Development :: Object Brokering +Classifier: Programming Language :: Python +Classifier: Programming Language :: Python :: 2 +Classifier: Programming Language :: Python :: 2.6 +Classifier: Programming Language :: Python :: 2.7 +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 3.3 +Classifier: Programming Language :: Python :: 3.4 +Classifier: Programming Language :: Python :: Implementation :: CPython +Classifier: Programming Language :: Python :: Implementation :: PyPy +Classifier: Programming Language :: Python :: Implementation :: Jython +Classifier: Operating System :: OS Independent +Classifier: Operating System :: POSIX +Classifier: Operating System :: Microsoft :: Windows +Classifier: Operating System :: MacOS :: MacOS X diff --git a/README.rst b/README.rst new file mode 100644 index 0000000..2d77569 --- /dev/null +++ b/README.rst @@ -0,0 +1,427 @@ +================================= + celery - Distributed Task Queue +================================= + +.. image:: http://cloud.github.com/downloads/celery/celery/celery_128.png + +:Version: 3.1.13 (Cipater) +:Web: http://celeryproject.org/ +:Download: http://pypi.python.org/pypi/celery/ +:Source: http://github.com/celery/celery/ +:Keywords: task queue, job queue, asynchronous, async, rabbitmq, amqp, redis, + python, webhooks, queue, distributed + +-- + +What is a Task Queue? +===================== + +Task queues are used as a mechanism to distribute work across threads or +machines. + +A task queue's input is a unit of work, called a task, dedicated worker +processes then constantly monitor the queue for new work to perform. + +Celery communicates via messages, usually using a broker +to mediate between clients and workers. To initiate a task a client puts a +message on the queue, the broker then delivers the message to a worker. + +A Celery system can consist of multiple workers and brokers, giving way +to high availability and horizontal scaling. + +Celery is a library written in Python, but the protocol can be implemented in +any language. So far there's RCelery_ for the Ruby programming language, and a +`PHP client`, but language interoperability can also be achieved +by using webhooks. + +.. _RCelery: http://leapfrogdevelopment.github.com/rcelery/ +.. _`PHP client`: https://github.com/gjedeer/celery-php +.. _`using webhooks`: + http://docs.celeryproject.org/en/latest/userguide/remote-tasks.html + +What do I need? +=============== + +Celery version 3.0 runs on, + +- Python (2.5, 2.6, 2.7, 3.2, 3.3) +- PyPy (1.8, 1.9) +- Jython (2.5, 2.7). + +This is the last version to support Python 2.5, +and from Celery 3.1, Python 2.6 or later is required. +The last version to support Python 2.4 was Celery series 2.2. + +*Celery* is usually used with a message broker to send and receive messages. +The RabbitMQ, Redis transports are feature complete, +but there's also experimental support for a myriad of other solutions, including +using SQLite for local development. + +*Celery* can run on a single machine, on multiple machines, or even +across datacenters. + +Get Started +=========== + +If this is the first time you're trying to use Celery, or you are +new to Celery 3.0 coming from previous versions then you should read our +getting started tutorials: + +- `First steps with Celery`_ + + Tutorial teaching you the bare minimum needed to get started with Celery. + +- `Next steps`_ + + A more complete overview, showing more features. + +.. _`First steps with Celery`: + http://docs.celeryproject.org/en/latest/getting-started/first-steps-with-celery.html + +.. _`Next steps`: + http://docs.celeryproject.org/en/latest/getting-started/next-steps.html + +Celery is... +============ + +- **Simple** + + Celery is easy to use and maintain, and does *not need configuration files*. + + It has an active, friendly community you can talk to for support, + including a `mailing-list`_ and and an IRC channel. + + Here's one of the simplest applications you can make:: + + from celery import Celery + + app = Celery('hello', broker='amqp://guest@localhost//') + + @app.task + def hello(): + return 'hello world' + +- **Highly Available** + + Workers and clients will automatically retry in the event + of connection loss or failure, and some brokers support + HA in way of *Master/Master* or *Master/Slave* replication. + +- **Fast** + + A single Celery process can process millions of tasks a minute, + with sub-millisecond round-trip latency (using RabbitMQ, + py-librabbitmq, and optimized settings). + +- **Flexible** + + Almost every part of *Celery* can be extended or used on its own, + Custom pool implementations, serializers, compression schemes, logging, + schedulers, consumers, producers, autoscalers, broker transports and much more. + +It supports... +============== + + - **Message Transports** + + - RabbitMQ_, Redis_, + - MongoDB_ (experimental), Amazon SQS (experimental), + - CouchDB_ (experimental), SQLAlchemy_ (experimental), + - Django ORM (experimental), `IronMQ`_ + - and more... + + - **Concurrency** + + - Prefork, Eventlet_, gevent_, threads/single threaded + + - **Result Stores** + + - AMQP, Redis + - memcached, MongoDB + - SQLAlchemy, Django ORM + - Apache Cassandra, IronCache + + - **Serialization** + + - *pickle*, *json*, *yaml*, *msgpack*. + - *zlib*, *bzip2* compression. + - Cryptographic message signing. + +.. _`Eventlet`: http://eventlet.net/ +.. _`gevent`: http://gevent.org/ + +.. _RabbitMQ: http://rabbitmq.com +.. _Redis: http://redis.io +.. _MongoDB: http://mongodb.org +.. _Beanstalk: http://kr.github.com/beanstalkd +.. _CouchDB: http://couchdb.apache.org +.. _SQLAlchemy: http://sqlalchemy.org +.. _`IronMQ`: http://iron.io + +Framework Integration +===================== + +Celery is easy to integrate with web frameworks, some of which even have +integration packages: + + +--------------------+------------------------+ + | `Django`_ | not needed | + +--------------------+------------------------+ + | `Pyramid`_ | `pyramid_celery`_ | + +--------------------+------------------------+ + | `Pylons`_ | `celery-pylons`_ | + +--------------------+------------------------+ + | `Flask`_ | not needed | + +--------------------+------------------------+ + | `web2py`_ | `web2py-celery`_ | + +--------------------+------------------------+ + | `Tornado`_ | `tornado-celery`_ | + +--------------------+------------------------+ + +The integration packages are not strictly necessary, but they can make +development easier, and sometimes they add important hooks like closing +database connections at ``fork``. + +.. _`Django`: http://djangoproject.com/ +.. _`Pylons`: http://pylonshq.com/ +.. _`Flask`: http://flask.pocoo.org/ +.. _`web2py`: http://web2py.com/ +.. _`Bottle`: http://bottlepy.org/ +.. _`Pyramid`: http://docs.pylonsproject.org/en/latest/docs/pyramid.html +.. _`pyramid_celery`: http://pypi.python.org/pypi/pyramid_celery/ +.. _`django-celery`: http://pypi.python.org/pypi/django-celery +.. _`celery-pylons`: http://pypi.python.org/pypi/celery-pylons +.. _`web2py-celery`: http://code.google.com/p/web2py-celery/ +.. _`Tornado`: http://www.tornadoweb.org/ +.. _`tornado-celery`: http://github.com/mher/tornado-celery/ + +.. _celery-documentation: + +Documentation +============= + +The `latest documentation`_ with user guides, tutorials and API reference +is hosted at Read The Docs. + +.. _`latest documentation`: http://docs.celeryproject.org/en/latest/ + +.. _celery-installation: + +Installation +============ + +You can install Celery either via the Python Package Index (PyPI) +or from source. + +To install using `pip`,:: + + $ pip install -U Celery + +To install using `easy_install`,:: + + $ easy_install -U Celery + +.. _bundles: + +Bundles +------- + +Celery also defines a group of bundles that can be used +to install Celery and the dependencies for a given feature. + +You can specify these in your requirements or on the ``pip`` comand-line +by using brackets. Multiple bundles can be specified by separating them by +commas. +:: + + $ pip install "celery[librabbitmq]" + + $ pip install "celery[librabbitmq,redis,auth,msgpack]" + +The following bundles are available: + +Serializers +~~~~~~~~~~~ + +:celery[auth]: + for using the auth serializer. + +:celery[msgpack]: + for using the msgpack serializer. + +:celery[yaml]: + for using the yaml serializer. + +Concurrency +~~~~~~~~~~~ + +:celery[eventlet]: + for using the eventlet pool. + +:celery[gevent]: + for using the gevent pool. + +:celery[threads]: + for using the thread pool. + +Transports and Backends +~~~~~~~~~~~~~~~~~~~~~~~ + +:celery[librabbitmq]: + for using the librabbitmq C library. + +:celery[redis]: + for using Redis as a message transport or as a result backend. + +:celery[mongodb]: + for using MongoDB as a message transport (*experimental*), + or as a result backend (*supported*). + +:celery[sqs]: + for using Amazon SQS as a message transport (*experimental*). + +:celery[memcache]: + for using memcached as a result backend. + +:celery[cassandra]: + for using Apache Cassandra as a result backend. + +:celery[couchdb]: + for using CouchDB as a message transport (*experimental*). + +:celery[couchbase]: + for using CouchBase as a result backend. + +:celery[beanstalk]: + for using Beanstalk as a message transport (*experimental*). + +:celery[zookeeper]: + for using Zookeeper as a message transport. + +:celery[zeromq]: + for using ZeroMQ as a message transport (*experimental*). + +:celery[sqlalchemy]: + for using SQLAlchemy as a message transport (*experimental*), + or as a result backend (*supported*). + +:celery[pyro]: + for using the Pyro4 message transport (*experimental*). + +:celery[slmq]: + for using the SoftLayer Message Queue transport (*experimental*). + +.. _celery-installing-from-source: + +Downloading and installing from source +-------------------------------------- + +Download the latest version of Celery from +http://pypi.python.org/pypi/celery/ + +You can install it by doing the following,:: + + $ tar xvfz celery-0.0.0.tar.gz + $ cd celery-0.0.0 + $ python setup.py build + # python setup.py install + +The last command must be executed as a privileged user if +you are not currently using a virtualenv. + +.. _celery-installing-from-git: + +Using the development version +----------------------------- + +With pip +~~~~~~~~ + +The Celery development version also requires the development +versions of ``kombu``, ``amqp`` and ``billiard``. + +You can install the latest snapshot of these using the following +pip commands:: + + $ pip install https://github.com/celery/celery/zipball/master#egg=celery + $ pip install https://github.com/celery/billiard/zipball/master#egg=billiard + $ pip install https://github.com/celery/py-amqp/zipball/master#egg=amqp + $ pip install https://github.com/celery/kombu/zipball/master#egg=kombu + +With git +~~~~~~~~ + +Please the Contributing section. + +.. _getting-help: + +Getting Help +============ + +.. _mailing-list: + +Mailing list +------------ + +For discussions about the usage, development, and future of celery, +please join the `celery-users`_ mailing list. + +.. _`celery-users`: http://groups.google.com/group/celery-users/ + +.. _irc-channel: + +IRC +--- + +Come chat with us on IRC. The **#celery** channel is located at the `Freenode`_ +network. + +.. _`Freenode`: http://freenode.net + +.. _bug-tracker: + +Bug tracker +=========== + +If you have any suggestions, bug reports or annoyances please report them +to our issue tracker at http://github.com/celery/celery/issues/ + +.. _wiki: + +Wiki +==== + +http://wiki.github.com/celery/celery/ + +.. _contributing-short: + +Contributing +============ + +Development of `celery` happens at Github: http://github.com/celery/celery + +You are highly encouraged to participate in the development +of `celery`. If you don't like Github (for some reason) you're welcome +to send regular patches. + +Be sure to also read the `Contributing to Celery`_ section in the +documentation. + +.. _`Contributing to Celery`: + http://docs.celeryproject.org/en/master/contributing.html + +.. _license: + +License +======= + +This software is licensed under the `New BSD License`. See the ``LICENSE`` +file in the top distribution directory for the full license text. + +.. # vim: syntax=rst expandtab tabstop=4 shiftwidth=4 shiftround + + +.. image:: https://d2weczhvl823v0.cloudfront.net/celery/celery/trend.png + :alt: Bitdeli badge + :target: https://bitdeli.com/free + diff --git a/TODO b/TODO new file mode 100644 index 0000000..0bd13b2 --- /dev/null +++ b/TODO @@ -0,0 +1,2 @@ +Please see our Issue Tracker at GitHub: + http://github.com/celery/celery/issues diff --git a/celery/__init__.py b/celery/__init__.py new file mode 100644 index 0000000..957e234 --- /dev/null +++ b/celery/__init__.py @@ -0,0 +1,152 @@ +# -*- coding: utf-8 -*- +"""Distributed Task Queue""" +# :copyright: (c) 2009 - 2012 Ask Solem and individual contributors, +# All rights reserved. +# :copyright: (c) 2012-2014 GoPivotal, Inc., All rights reserved. +# :license: BSD (3 Clause), see LICENSE for more details. + +from __future__ import absolute_import + +from collections import namedtuple + +version_info_t = namedtuple( + 'version_info_t', ('major', 'minor', 'micro', 'releaselevel', 'serial'), +) + +SERIES = 'Cipater' +VERSION = version_info_t(3, 1, 13, '', '') +__version__ = '{0.major}.{0.minor}.{0.micro}{0.releaselevel}'.format(VERSION) +__author__ = 'Ask Solem' +__contact__ = 'ask@celeryproject.org' +__homepage__ = 'http://celeryproject.org' +__docformat__ = 'restructuredtext' +__all__ = [ + 'Celery', 'bugreport', 'shared_task', 'task', + 'current_app', 'current_task', 'maybe_signature', + 'chain', 'chord', 'chunks', 'group', 'signature', + 'xmap', 'xstarmap', 'uuid', 'version', '__version__', +] +VERSION_BANNER = '{0} ({1})'.format(__version__, SERIES) + +# -eof meta- + +import os +import sys +if os.environ.get('C_IMPDEBUG'): # pragma: no cover + from .five import builtins + real_import = builtins.__import__ + + def debug_import(name, locals=None, globals=None, + fromlist=None, level=-1): + glob = globals or getattr(sys, 'emarfteg_'[::-1])(1).f_globals + importer_name = glob and glob.get('__name__') or 'unknown' + print('-- {0} imports {1}'.format(importer_name, name)) + return real_import(name, locals, globals, fromlist, level) + builtins.__import__ = debug_import + +# This is never executed, but tricks static analyzers (PyDev, PyCharm, +# pylint, etc.) into knowing the types of these symbols, and what +# they contain. +STATICA_HACK = True +globals()['kcah_acitats'[::-1].upper()] = False +if STATICA_HACK: # pragma: no cover + from celery.app import shared_task # noqa + from celery.app.base import Celery # noqa + from celery.app.utils import bugreport # noqa + from celery.app.task import Task # noqa + from celery._state import current_app, current_task # noqa + from celery.canvas import ( # noqa + chain, chord, chunks, group, + signature, maybe_signature, xmap, xstarmap, subtask, + ) + from celery.utils import uuid # noqa + +# Eventlet/gevent patching must happen before importing +# anything else, so these tools must be at top-level. + + +def _find_option_with_arg(argv, short_opts=None, long_opts=None): + """Search argv for option specifying its short and longopt + alternatives. + + Return the value of the option if found. + + """ + for i, arg in enumerate(argv): + if arg.startswith('-'): + if long_opts and arg.startswith('--'): + name, _, val = arg.partition('=') + if name in long_opts: + return val + if short_opts and arg in short_opts: + return argv[i + 1] + raise KeyError('|'.join(short_opts or [] + long_opts or [])) + + +def _patch_eventlet(): + import eventlet + import eventlet.debug + eventlet.monkey_patch() + EVENTLET_DBLOCK = int(os.environ.get('EVENTLET_NOBLOCK', 0)) + if EVENTLET_DBLOCK: + eventlet.debug.hub_blocking_detection(EVENTLET_DBLOCK) + + +def _patch_gevent(): + from gevent import monkey, version_info + monkey.patch_all() + if version_info[0] == 0: # pragma: no cover + # Signals aren't working in gevent versions <1.0, + # and are not monkey patched by patch_all() + from gevent import signal as _gevent_signal + _signal = __import__('signal') + _signal.signal = _gevent_signal + + +def maybe_patch_concurrency(argv=sys.argv, + short_opts=['-P'], long_opts=['--pool'], + patches={'eventlet': _patch_eventlet, + 'gevent': _patch_gevent}): + """With short and long opt alternatives that specify the command line + option to set the pool, this makes sure that anything that needs + to be patched is completed as early as possible. + (e.g. eventlet/gevent monkey patches).""" + try: + pool = _find_option_with_arg(argv, short_opts, long_opts) + except KeyError: + pass + else: + try: + patcher = patches[pool] + except KeyError: + pass + else: + patcher() + # set up eventlet/gevent environments ASAP. + from celery import concurrency + concurrency.get_implementation(pool) + +# Lazy loading +from celery import five + +old_module, new_module = five.recreate_module( # pragma: no cover + __name__, + by_module={ + 'celery.app': ['Celery', 'bugreport', 'shared_task'], + 'celery.app.task': ['Task'], + 'celery._state': ['current_app', 'current_task'], + 'celery.canvas': ['chain', 'chord', 'chunks', 'group', + 'signature', 'maybe_signature', 'subtask', + 'xmap', 'xstarmap'], + 'celery.utils': ['uuid'], + }, + direct={'task': 'celery.task'}, + __package__='celery', __file__=__file__, + __path__=__path__, __doc__=__doc__, __version__=__version__, + __author__=__author__, __contact__=__contact__, + __homepage__=__homepage__, __docformat__=__docformat__, five=five, + VERSION=VERSION, SERIES=SERIES, VERSION_BANNER=VERSION_BANNER, + version_info_t=version_info_t, + maybe_patch_concurrency=maybe_patch_concurrency, + _find_option_with_arg=_find_option_with_arg, +) diff --git a/celery/__main__.py b/celery/__main__.py new file mode 100644 index 0000000..04448e2 --- /dev/null +++ b/celery/__main__.py @@ -0,0 +1,54 @@ +from __future__ import absolute_import + +import sys + +from os.path import basename + +from . import maybe_patch_concurrency + +__all__ = ['main'] + +DEPRECATED_FMT = """ +The {old!r} command is deprecated, please use {new!r} instead: + +$ {new_argv} + +""" + + +def _warn_deprecated(new): + print(DEPRECATED_FMT.format( + old=basename(sys.argv[0]), new=new, + new_argv=' '.join([new] + sys.argv[1:])), + ) + + +def main(): + if 'multi' not in sys.argv: + maybe_patch_concurrency() + from celery.bin.celery import main + main() + + +def _compat_worker(): + maybe_patch_concurrency() + _warn_deprecated('celery worker') + from celery.bin.worker import main + main() + + +def _compat_multi(): + _warn_deprecated('celery multi') + from celery.bin.multi import main + main() + + +def _compat_beat(): + maybe_patch_concurrency() + _warn_deprecated('celery beat') + from celery.bin.beat import main + main() + + +if __name__ == '__main__': # pragma: no cover + main() diff --git a/celery/_state.py b/celery/_state.py new file mode 100644 index 0000000..755bb92 --- /dev/null +++ b/celery/_state.py @@ -0,0 +1,159 @@ +# -*- coding: utf-8 -*- +""" + celery._state + ~~~~~~~~~~~~~~~ + + This is an internal module containing thread state + like the ``current_app``, and ``current_task``. + + This module shouldn't be used directly. + +""" +from __future__ import absolute_import, print_function + +import os +import sys +import threading +import weakref + +from celery.local import Proxy +from celery.utils.threads import LocalStack + +try: + from weakref import WeakSet as AppSet +except ImportError: # XXX Py2.6 + + class AppSet(object): # noqa + + def __init__(self): + self._refs = set() + + def add(self, app): + self._refs.add(weakref.ref(app)) + + def __iter__(self): + dirty = [] + try: + for appref in self._refs: + app = appref() + if app is None: + dirty.append(appref) + else: + yield app + finally: + while dirty: + self._refs.discard(dirty.pop()) + +__all__ = ['set_default_app', 'get_current_app', 'get_current_task', + 'get_current_worker_task', 'current_app', 'current_task', + 'connect_on_app_finalize'] + +#: Global default app used when no current app. +default_app = None + +#: List of all app instances (weakrefs), must not be used directly. +_apps = AppSet() + +#: global set of functions to call whenever a new app is finalized +#: E.g. Shared tasks, and builtin tasks are created +#: by adding callbacks here. +_on_app_finalizers = set() + +_task_join_will_block = False + + +def connect_on_app_finalize(callback): + _on_app_finalizers.add(callback) + return callback + + +def _announce_app_finalized(app): + callbacks = set(_on_app_finalizers) + for callback in callbacks: + callback(app) + + +def _set_task_join_will_block(blocks): + global _task_join_will_block + _task_join_will_block = blocks + + +def task_join_will_block(): + return _task_join_will_block + + +class _TLS(threading.local): + #: Apps with the :attr:`~celery.app.base.BaseApp.set_as_current` attribute + #: sets this, so it will always contain the last instantiated app, + #: and is the default app returned by :func:`app_or_default`. + current_app = None +_tls = _TLS() + +_task_stack = LocalStack() + + +def set_default_app(app): + global default_app + default_app = app + + +def _get_current_app(): + if default_app is None: + #: creates the global fallback app instance. + from celery.app import Celery + set_default_app(Celery( + 'default', + loader=os.environ.get('CELERY_LOADER') or 'default', + fixups=[], + set_as_current=False, accept_magic_kwargs=True, + )) + return _tls.current_app or default_app + + +def _set_current_app(app): + _tls.current_app = app + + +C_STRICT_APP = os.environ.get('C_STRICT_APP') +if os.environ.get('C_STRICT_APP'): # pragma: no cover + def get_current_app(): + raise Exception('USES CURRENT APP') + import traceback + print('-- USES CURRENT_APP', file=sys.stderr) # noqa+ + traceback.print_stack(file=sys.stderr) + return _get_current_app() +else: + get_current_app = _get_current_app + + +def get_current_task(): + """Currently executing task.""" + return _task_stack.top + + +def get_current_worker_task(): + """Currently executing task, that was applied by the worker. + + This is used to differentiate between the actual task + executed by the worker and any task that was called within + a task (using ``task.__call__`` or ``task.apply``) + + """ + for task in reversed(_task_stack.stack): + if not task.request.called_directly: + return task + + +#: Proxy to current app. +current_app = Proxy(get_current_app) + +#: Proxy to current task. +current_task = Proxy(get_current_task) + + +def _register_app(app): + _apps.add(app) + + +def _get_active_apps(): + return _apps diff --git a/celery/app/__init__.py b/celery/app/__init__.py new file mode 100644 index 0000000..952a874 --- /dev/null +++ b/celery/app/__init__.py @@ -0,0 +1,150 @@ +# -*- coding: utf-8 -*- +""" + celery.app + ~~~~~~~~~~ + + Celery Application. + +""" +from __future__ import absolute_import + +import os + +from celery.local import Proxy +from celery import _state +from celery._state import ( + get_current_app as current_app, + get_current_task as current_task, + connect_on_app_finalize, set_default_app, _get_active_apps, _task_stack, +) +from celery.utils import gen_task_name + +from .base import Celery, AppPickler + +__all__ = ['Celery', 'AppPickler', 'default_app', 'app_or_default', + 'bugreport', 'enable_trace', 'disable_trace', 'shared_task', + 'set_default_app', 'current_app', 'current_task', + 'push_current_task', 'pop_current_task'] + +#: Proxy always returning the app set as default. +default_app = Proxy(lambda: _state.default_app) + +#: Function returning the app provided or the default app if none. +#: +#: The environment variable :envvar:`CELERY_TRACE_APP` is used to +#: trace app leaks. When enabled an exception is raised if there +#: is no active app. +app_or_default = None + +#: The 'default' loader is the default loader used by old applications. +#: This is deprecated and should no longer be used as it's set too early +#: to be affected by --loader argument. +default_loader = os.environ.get('CELERY_LOADER') or 'default' # XXX + + +#: Function used to push a task to the thread local stack +#: keeping track of the currently executing task. +#: You must remember to pop the task after. +push_current_task = _task_stack.push + +#: Function used to pop a task from the thread local stack +#: keeping track of the currently executing task. +pop_current_task = _task_stack.pop + + +def bugreport(app=None): + return (app or current_app()).bugreport() + + +def _app_or_default(app=None): + if app is None: + return _state.get_current_app() + return app + + +def _app_or_default_trace(app=None): # pragma: no cover + from traceback import print_stack + from billiard import current_process + if app is None: + if getattr(_state._tls, 'current_app', None): + print('-- RETURNING TO CURRENT APP --') # noqa+ + print_stack() + return _state._tls.current_app + if current_process()._name == 'MainProcess': + raise Exception('DEFAULT APP') + print('-- RETURNING TO DEFAULT APP --') # noqa+ + print_stack() + return _state.default_app + return app + + +def enable_trace(): + global app_or_default + app_or_default = _app_or_default_trace + + +def disable_trace(): + global app_or_default + app_or_default = _app_or_default + +if os.environ.get('CELERY_TRACE_APP'): # pragma: no cover + enable_trace() +else: + disable_trace() + +App = Celery # XXX Compat + + +def shared_task(*args, **kwargs): + """Create shared tasks (decorator). + Will return a proxy that always takes the task from the current apps + task registry. + + This can be used by library authors to create tasks that will work + for any app environment. + + Example: + + >>> from celery import Celery, shared_task + >>> @shared_task + ... def add(x, y): + ... return x + y + + >>> app1 = Celery(broker='amqp://') + >>> add.app is app1 + True + + >>> app2 = Celery(broker='redis://') + >>> add.app is app2 + + """ + + def create_shared_task(**options): + + def __inner(fun): + name = options.get('name') + # Set as shared task so that unfinalized apps, + # and future apps will load the task. + connect_on_app_finalize( + lambda app: app._task_from_fun(fun, **options) + ) + + # Force all finalized apps to take this task as well. + for app in _get_active_apps(): + if app.finalized: + with app._finalize_mutex: + app._task_from_fun(fun, **options) + + # Return a proxy that always gets the task from the current + # apps task registry. + def task_by_cons(): + app = current_app() + return app.tasks[ + name or gen_task_name(app, fun.__name__, fun.__module__) + ] + return Proxy(task_by_cons) + return __inner + + if len(args) == 1 and callable(args[0]): + return create_shared_task(**kwargs)(args[0]) + return create_shared_task(*args, **kwargs) diff --git a/celery/app/amqp.py b/celery/app/amqp.py new file mode 100644 index 0000000..1d65841 --- /dev/null +++ b/celery/app/amqp.py @@ -0,0 +1,502 @@ +# -*- coding: utf-8 -*- +""" + celery.app.amqp + ~~~~~~~~~~~~~~~ + + Sending and receiving messages using Kombu. + +""" +from __future__ import absolute_import + +import numbers + +from datetime import timedelta +from weakref import WeakValueDictionary + +from kombu import Connection, Consumer, Exchange, Producer, Queue +from kombu.common import Broadcast +from kombu.pools import ProducerPool +from kombu.utils import cached_property, uuid +from kombu.utils.encoding import safe_repr +from kombu.utils.functional import maybe_list + +from celery import signals +from celery.five import items, string_t +from celery.utils.text import indent as textindent +from celery.utils.timeutils import to_utc + +from . import app_or_default +from . import routes as _routes + +__all__ = ['AMQP', 'Queues', 'TaskProducer', 'TaskConsumer'] + +#: Human readable queue declaration. +QUEUE_FORMAT = """ +.> {0.name:<16} exchange={0.exchange.name}({0.exchange.type}) \ +key={0.routing_key} +""" + + +class Queues(dict): + """Queue name⇒ declaration mapping. + + :param queues: Initial list/tuple or dict of queues. + :keyword create_missing: By default any unknown queues will be + added automatically, but if disabled + the occurrence of unknown queues + in `wanted` will raise :exc:`KeyError`. + :keyword ha_policy: Default HA policy for queues with none set. + + + """ + #: If set, this is a subset of queues to consume from. + #: The rest of the queues are then used for routing only. + _consume_from = None + + def __init__(self, queues=None, default_exchange=None, + create_missing=True, ha_policy=None, autoexchange=None): + dict.__init__(self) + self.aliases = WeakValueDictionary() + self.default_exchange = default_exchange + self.create_missing = create_missing + self.ha_policy = ha_policy + self.autoexchange = Exchange if autoexchange is None else autoexchange + if isinstance(queues, (tuple, list)): + queues = dict((q.name, q) for q in queues) + for name, q in items(queues or {}): + self.add(q) if isinstance(q, Queue) else self.add_compat(name, **q) + + def __getitem__(self, name): + try: + return self.aliases[name] + except KeyError: + return dict.__getitem__(self, name) + + def __setitem__(self, name, queue): + if self.default_exchange and (not queue.exchange or + not queue.exchange.name): + queue.exchange = self.default_exchange + dict.__setitem__(self, name, queue) + if queue.alias: + self.aliases[queue.alias] = queue + + def __missing__(self, name): + if self.create_missing: + return self.add(self.new_missing(name)) + raise KeyError(name) + + def add(self, queue, **kwargs): + """Add new queue. + + The first argument can either be a :class:`kombu.Queue` instance, + or the name of a queue. If the former the rest of the keyword + arguments are ignored, and options are simply taken from the queue + instance. + + :param queue: :class:`kombu.Queue` instance or name of the queue. + :keyword exchange: (if named) specifies exchange name. + :keyword routing_key: (if named) specifies binding key. + :keyword exchange_type: (if named) specifies type of exchange. + :keyword \*\*options: (if named) Additional declaration options. + + """ + if not isinstance(queue, Queue): + return self.add_compat(queue, **kwargs) + if self.ha_policy: + if queue.queue_arguments is None: + queue.queue_arguments = {} + self._set_ha_policy(queue.queue_arguments) + self[queue.name] = queue + return queue + + def add_compat(self, name, **options): + # docs used to use binding_key as routing key + options.setdefault('routing_key', options.get('binding_key')) + if options['routing_key'] is None: + options['routing_key'] = name + if self.ha_policy is not None: + self._set_ha_policy(options.setdefault('queue_arguments', {})) + q = self[name] = Queue.from_dict(name, **options) + return q + + def _set_ha_policy(self, args): + policy = self.ha_policy + if isinstance(policy, (list, tuple)): + return args.update({'x-ha-policy': 'nodes', + 'x-ha-policy-params': list(policy)}) + args['x-ha-policy'] = policy + + def format(self, indent=0, indent_first=True): + """Format routing table into string for log dumps.""" + active = self.consume_from + if not active: + return '' + info = [QUEUE_FORMAT.strip().format(q) + for _, q in sorted(items(active))] + if indent_first: + return textindent('\n'.join(info), indent) + return info[0] + '\n' + textindent('\n'.join(info[1:]), indent) + + def select_add(self, queue, **kwargs): + """Add new task queue that will be consumed from even when + a subset has been selected using the :option:`-Q` option.""" + q = self.add(queue, **kwargs) + if self._consume_from is not None: + self._consume_from[q.name] = q + return q + + def select(self, include): + """Sets :attr:`consume_from` by selecting a subset of the + currently defined queues. + + :param include: Names of queues to consume from. + Can be iterable or string. + """ + if include: + self._consume_from = dict((name, self[name]) + for name in maybe_list(include)) + select_subset = select # XXX compat + + def deselect(self, exclude): + """Deselect queues so that they will not be consumed from. + + :param exclude: Names of queues to avoid consuming from. + Can be iterable or string. + + """ + if exclude: + exclude = maybe_list(exclude) + if self._consume_from is None: + # using selection + return self.select(k for k in self if k not in exclude) + # using all queues + for queue in exclude: + self._consume_from.pop(queue, None) + select_remove = deselect # XXX compat + + def new_missing(self, name): + return Queue(name, self.autoexchange(name), name) + + @property + def consume_from(self): + if self._consume_from is not None: + return self._consume_from + return self + + +class TaskProducer(Producer): + app = None + auto_declare = False + retry = False + retry_policy = None + utc = True + event_dispatcher = None + send_sent_event = False + + def __init__(self, channel=None, exchange=None, *args, **kwargs): + self.retry = kwargs.pop('retry', self.retry) + self.retry_policy = kwargs.pop('retry_policy', + self.retry_policy or {}) + self.send_sent_event = kwargs.pop('send_sent_event', + self.send_sent_event) + exchange = exchange or self.exchange + self.queues = self.app.amqp.queues # shortcut + self.default_queue = self.app.amqp.default_queue + self._default_mode = self.app.conf.CELERY_DEFAULT_DELIVERY_MODE + super(TaskProducer, self).__init__(channel, exchange, *args, **kwargs) + + def publish_task(self, task_name, task_args=None, task_kwargs=None, + countdown=None, eta=None, task_id=None, group_id=None, + taskset_id=None, # compat alias to group_id + expires=None, exchange=None, exchange_type=None, + event_dispatcher=None, retry=None, retry_policy=None, + queue=None, now=None, retries=0, chord=None, + callbacks=None, errbacks=None, routing_key=None, + serializer=None, delivery_mode=None, compression=None, + reply_to=None, time_limit=None, soft_time_limit=None, + declare=None, headers=None, + send_before_publish=signals.before_task_publish.send, + before_receivers=signals.before_task_publish.receivers, + send_after_publish=signals.after_task_publish.send, + after_receivers=signals.after_task_publish.receivers, + send_task_sent=signals.task_sent.send, # XXX deprecated + sent_receivers=signals.task_sent.receivers, + **kwargs): + """Send task message.""" + retry = self.retry if retry is None else retry + headers = {} if headers is None else headers + + qname = queue + if queue is None and exchange is None: + queue = self.default_queue + if queue is not None: + if isinstance(queue, string_t): + qname, queue = queue, self.queues[queue] + else: + qname = queue.name + exchange = exchange or queue.exchange.name + routing_key = routing_key or queue.routing_key + if declare is None and queue and not isinstance(queue, Broadcast): + declare = [queue] + if delivery_mode is None: + delivery_mode = self._default_mode + + # merge default and custom policy + retry = self.retry if retry is None else retry + _rp = (dict(self.retry_policy, **retry_policy) if retry_policy + else self.retry_policy) + task_id = task_id or uuid() + task_args = task_args or [] + task_kwargs = task_kwargs or {} + if not isinstance(task_args, (list, tuple)): + raise ValueError('task args must be a list or tuple') + if not isinstance(task_kwargs, dict): + raise ValueError('task kwargs must be a dictionary') + if countdown: # Convert countdown to ETA. + now = now or self.app.now() + eta = now + timedelta(seconds=countdown) + if self.utc: + eta = to_utc(eta).astimezone(self.app.timezone) + if isinstance(expires, numbers.Real): + now = now or self.app.now() + expires = now + timedelta(seconds=expires) + if self.utc: + expires = to_utc(expires).astimezone(self.app.timezone) + eta = eta and eta.isoformat() + expires = expires and expires.isoformat() + + body = { + 'task': task_name, + 'id': task_id, + 'args': task_args, + 'kwargs': task_kwargs, + 'retries': retries or 0, + 'eta': eta, + 'expires': expires, + 'utc': self.utc, + 'callbacks': callbacks, + 'errbacks': errbacks, + 'timelimit': (time_limit, soft_time_limit), + 'taskset': group_id or taskset_id, + 'chord': chord, + } + + if before_receivers: + send_before_publish( + sender=task_name, body=body, + exchange=exchange, + routing_key=routing_key, + declare=declare, + headers=headers, + properties=kwargs, + retry_policy=retry_policy, + ) + + self.publish( + body, + exchange=exchange, routing_key=routing_key, + serializer=serializer or self.serializer, + compression=compression or self.compression, + headers=headers, + retry=retry, retry_policy=_rp, + reply_to=reply_to, + correlation_id=task_id, + delivery_mode=delivery_mode, declare=declare, + **kwargs + ) + + if after_receivers: + send_after_publish(sender=task_name, body=body, + exchange=exchange, routing_key=routing_key) + + if sent_receivers: # XXX deprecated + send_task_sent(sender=task_name, task_id=task_id, + task=task_name, args=task_args, + kwargs=task_kwargs, eta=eta, + taskset=group_id or taskset_id) + if self.send_sent_event: + evd = event_dispatcher or self.event_dispatcher + exname = exchange or self.exchange + if isinstance(exname, Exchange): + exname = exname.name + evd.publish( + 'task-sent', + { + 'uuid': task_id, + 'name': task_name, + 'args': safe_repr(task_args), + 'kwargs': safe_repr(task_kwargs), + 'retries': retries, + 'eta': eta, + 'expires': expires, + 'queue': qname, + 'exchange': exname, + 'routing_key': routing_key, + }, + self, retry=retry, retry_policy=retry_policy, + ) + return task_id + delay_task = publish_task # XXX Compat + + @cached_property + def event_dispatcher(self): + # We call Dispatcher.publish with a custom producer + # so don't need the dispatcher to be "enabled". + return self.app.events.Dispatcher(enabled=False) + + +class TaskPublisher(TaskProducer): + """Deprecated version of :class:`TaskProducer`.""" + + def __init__(self, channel=None, exchange=None, *args, **kwargs): + self.app = app_or_default(kwargs.pop('app', self.app)) + self.retry = kwargs.pop('retry', self.retry) + self.retry_policy = kwargs.pop('retry_policy', + self.retry_policy or {}) + exchange = exchange or self.exchange + if not isinstance(exchange, Exchange): + exchange = Exchange(exchange, + kwargs.pop('exchange_type', 'direct')) + self.queues = self.app.amqp.queues # shortcut + super(TaskPublisher, self).__init__(channel, exchange, *args, **kwargs) + + +class TaskConsumer(Consumer): + app = None + + def __init__(self, channel, queues=None, app=None, accept=None, **kw): + self.app = app or self.app + if accept is None: + accept = self.app.conf.CELERY_ACCEPT_CONTENT + super(TaskConsumer, self).__init__( + channel, + queues or list(self.app.amqp.queues.consume_from.values()), + accept=accept, + **kw + ) + + +class AMQP(object): + Connection = Connection + Consumer = Consumer + + #: compat alias to Connection + BrokerConnection = Connection + + producer_cls = TaskProducer + consumer_cls = TaskConsumer + queues_cls = Queues + + #: Cached and prepared routing table. + _rtable = None + + #: Underlying producer pool instance automatically + #: set by the :attr:`producer_pool`. + _producer_pool = None + + # Exchange class/function used when defining automatic queues. + # E.g. you can use ``autoexchange = lambda n: None`` to use the + # amqp default exchange, which is a shortcut to bypass routing + # and instead send directly to the queue named in the routing key. + autoexchange = None + + def __init__(self, app): + self.app = app + + def flush_routes(self): + self._rtable = _routes.prepare(self.app.conf.CELERY_ROUTES) + + def Queues(self, queues, create_missing=None, ha_policy=None, + autoexchange=None): + """Create new :class:`Queues` instance, using queue defaults + from the current configuration.""" + conf = self.app.conf + if create_missing is None: + create_missing = conf.CELERY_CREATE_MISSING_QUEUES + if ha_policy is None: + ha_policy = conf.CELERY_QUEUE_HA_POLICY + if not queues and conf.CELERY_DEFAULT_QUEUE: + queues = (Queue(conf.CELERY_DEFAULT_QUEUE, + exchange=self.default_exchange, + routing_key=conf.CELERY_DEFAULT_ROUTING_KEY), ) + autoexchange = (self.autoexchange if autoexchange is None + else autoexchange) + return self.queues_cls( + queues, self.default_exchange, create_missing, + ha_policy, autoexchange, + ) + + def Router(self, queues=None, create_missing=None): + """Return the current task router.""" + return _routes.Router(self.routes, queues or self.queues, + self.app.either('CELERY_CREATE_MISSING_QUEUES', + create_missing), app=self.app) + + @cached_property + def TaskConsumer(self): + """Return consumer configured to consume from the queues + we are configured for (``app.amqp.queues.consume_from``).""" + return self.app.subclass_with_self(self.consumer_cls, + reverse='amqp.TaskConsumer') + get_task_consumer = TaskConsumer # XXX compat + + @cached_property + def TaskProducer(self): + """Return publisher used to send tasks. + + You should use `app.send_task` instead. + + """ + conf = self.app.conf + return self.app.subclass_with_self( + self.producer_cls, + reverse='amqp.TaskProducer', + exchange=self.default_exchange, + routing_key=conf.CELERY_DEFAULT_ROUTING_KEY, + serializer=conf.CELERY_TASK_SERIALIZER, + compression=conf.CELERY_MESSAGE_COMPRESSION, + retry=conf.CELERY_TASK_PUBLISH_RETRY, + retry_policy=conf.CELERY_TASK_PUBLISH_RETRY_POLICY, + send_sent_event=conf.CELERY_SEND_TASK_SENT_EVENT, + utc=conf.CELERY_ENABLE_UTC, + ) + TaskPublisher = TaskProducer # compat + + @cached_property + def default_queue(self): + return self.queues[self.app.conf.CELERY_DEFAULT_QUEUE] + + @cached_property + def queues(self): + """Queue name⇒ declaration mapping.""" + return self.Queues(self.app.conf.CELERY_QUEUES) + + @queues.setter # noqa + def queues(self, queues): + return self.Queues(queues) + + @property + def routes(self): + if self._rtable is None: + self.flush_routes() + return self._rtable + + @cached_property + def router(self): + return self.Router() + + @property + def producer_pool(self): + if self._producer_pool is None: + self._producer_pool = ProducerPool( + self.app.pool, + limit=self.app.pool.limit, + Producer=self.TaskProducer, + ) + return self._producer_pool + publisher_pool = producer_pool # compat alias + + @cached_property + def default_exchange(self): + return Exchange(self.app.conf.CELERY_DEFAULT_EXCHANGE, + self.app.conf.CELERY_DEFAULT_EXCHANGE_TYPE) diff --git a/celery/app/annotations.py b/celery/app/annotations.py new file mode 100644 index 0000000..62ee2e7 --- /dev/null +++ b/celery/app/annotations.py @@ -0,0 +1,58 @@ +# -*- coding: utf-8 -*- +""" + celery.app.annotations + ~~~~~~~~~~~~~~~~~~~~~~ + + Annotations is a nice term for moneky patching + task classes in the configuration. + + This prepares and performs the annotations in the + :setting:`CELERY_ANNOTATIONS` setting. + +""" +from __future__ import absolute_import + +from celery.five import string_t +from celery.utils.functional import firstmethod, mlazy +from celery.utils.imports import instantiate + +_first_match = firstmethod('annotate') +_first_match_any = firstmethod('annotate_any') + +__all__ = ['MapAnnotation', 'prepare', 'resolve_all'] + + +class MapAnnotation(dict): + + def annotate_any(self): + try: + return dict(self['*']) + except KeyError: + pass + + def annotate(self, task): + try: + return dict(self[task.name]) + except KeyError: + pass + + +def prepare(annotations): + """Expands the :setting:`CELERY_ANNOTATIONS` setting.""" + + def expand_annotation(annotation): + if isinstance(annotation, dict): + return MapAnnotation(annotation) + elif isinstance(annotation, string_t): + return mlazy(instantiate, annotation) + return annotation + + if annotations is None: + return () + elif not isinstance(annotations, (list, tuple)): + annotations = (annotations, ) + return [expand_annotation(anno) for anno in annotations] + + +def resolve_all(anno, task): + return (x for x in (_first_match(anno, task), _first_match_any(anno)) if x) diff --git a/celery/app/base.py b/celery/app/base.py new file mode 100644 index 0000000..274e391 --- /dev/null +++ b/celery/app/base.py @@ -0,0 +1,666 @@ +# -*- coding: utf-8 -*- +""" + celery.app.base + ~~~~~~~~~~~~~~~ + + Actual App instance implementation. + +""" +from __future__ import absolute_import + +import os +import threading +import warnings + +from collections import defaultdict, deque +from copy import deepcopy +from operator import attrgetter + +from amqp import promise +from billiard.util import register_after_fork +from kombu.clocks import LamportClock +from kombu.common import oid_from +from kombu.utils import cached_property, uuid + +from celery import platforms +from celery import signals +from celery._state import ( + _task_stack, get_current_app, _set_current_app, set_default_app, + _register_app, get_current_worker_task, connect_on_app_finalize, + _announce_app_finalized, +) +from celery.exceptions import AlwaysEagerIgnored, ImproperlyConfigured +from celery.five import values +from celery.loaders import get_loader_cls +from celery.local import PromiseProxy, maybe_evaluate +from celery.utils.functional import first, maybe_list +from celery.utils.imports import instantiate, symbol_by_name +from celery.utils.objects import FallbackContext, mro_lookup + +from .annotations import prepare as prepare_annotations +from .defaults import DEFAULTS, find_deprecated_settings +from .registry import TaskRegistry +from .utils import ( + AppPickler, Settings, bugreport, _unpickle_app, _unpickle_app_v2, appstr, +) + +# Load all builtin tasks +from . import builtins # noqa + +__all__ = ['Celery'] + +_EXECV = os.environ.get('FORKED_BY_MULTIPROCESSING') +BUILTIN_FIXUPS = frozenset([ + 'celery.fixups.django:fixup', +]) + +ERR_ENVVAR_NOT_SET = """\ +The environment variable {0!r} is not set, +and as such the configuration could not be loaded. +Please set this variable and make it point to +a configuration module.""" + +_after_fork_registered = False + + +def app_has_custom(app, attr): + return mro_lookup(app.__class__, attr, stop=(Celery, object), + monkey_patched=[__name__]) + + +def _unpickle_appattr(reverse_name, args): + """Given an attribute name and a list of args, gets + the attribute from the current app and calls it.""" + return get_current_app()._rgetattr(reverse_name)(*args) + + +def _global_after_fork(obj): + # Previously every app would call: + # `register_after_fork(app, app._after_fork)` + # but this created a leak as `register_after_fork` stores concrete object + # references and once registered an object cannot be removed without + # touching and iterating over the private afterfork registry list. + # + # See Issue #1949 + from celery import _state + from multiprocessing import util as mputil + for app in _state._apps: + try: + app._after_fork(obj) + except Exception as exc: + if mputil._logger: + mputil._logger.info( + 'after forker raised exception: %r', exc, exc_info=1) + + +def _ensure_after_fork(): + global _after_fork_registered + _after_fork_registered = True + register_after_fork(_global_after_fork, _global_after_fork) + + +class Celery(object): + #: This is deprecated, use :meth:`reduce_keys` instead + Pickler = AppPickler + + SYSTEM = platforms.SYSTEM + IS_OSX, IS_WINDOWS = platforms.IS_OSX, platforms.IS_WINDOWS + + amqp_cls = 'celery.app.amqp:AMQP' + backend_cls = None + events_cls = 'celery.events:Events' + loader_cls = 'celery.loaders.app:AppLoader' + log_cls = 'celery.app.log:Logging' + control_cls = 'celery.app.control:Control' + task_cls = 'celery.app.task:Task' + registry_cls = TaskRegistry + _fixups = None + _pool = None + builtin_fixups = BUILTIN_FIXUPS + + def __init__(self, main=None, loader=None, backend=None, + amqp=None, events=None, log=None, control=None, + set_as_current=True, accept_magic_kwargs=False, + tasks=None, broker=None, include=None, changes=None, + config_source=None, fixups=None, task_cls=None, + autofinalize=True, **kwargs): + self.clock = LamportClock() + self.main = main + self.amqp_cls = amqp or self.amqp_cls + self.events_cls = events or self.events_cls + self.loader_cls = loader or self.loader_cls + self.log_cls = log or self.log_cls + self.control_cls = control or self.control_cls + self.task_cls = task_cls or self.task_cls + self.set_as_current = set_as_current + self.registry_cls = symbol_by_name(self.registry_cls) + self.accept_magic_kwargs = accept_magic_kwargs + self.user_options = defaultdict(set) + self.steps = defaultdict(set) + self.autofinalize = autofinalize + + self.configured = False + self._config_source = config_source + self._pending_defaults = deque() + + self.finalized = False + self._finalize_mutex = threading.Lock() + self._pending = deque() + self._tasks = tasks + if not isinstance(self._tasks, TaskRegistry): + self._tasks = TaskRegistry(self._tasks or {}) + + # If the class defins a custom __reduce_args__ we need to use + # the old way of pickling apps, which is pickling a list of + # args instead of the new way that pickles a dict of keywords. + self._using_v1_reduce = app_has_custom(self, '__reduce_args__') + + # these options are moved to the config to + # simplify pickling of the app object. + self._preconf = changes or {} + if broker: + self._preconf['BROKER_URL'] = broker + if backend: + self._preconf['CELERY_RESULT_BACKEND'] = backend + if include: + self._preconf['CELERY_IMPORTS'] = include + + # - Apply fixups. + self.fixups = set(self.builtin_fixups) if fixups is None else fixups + # ...store fixup instances in _fixups to keep weakrefs alive. + self._fixups = [symbol_by_name(fixup)(self) for fixup in self.fixups] + + if self.set_as_current: + self.set_current() + + self.on_init() + _register_app(self) + + def set_current(self): + _set_current_app(self) + + def set_default(self): + set_default_app(self) + + def __enter__(self): + return self + + def __exit__(self, *exc_info): + self.close() + + def close(self): + self._maybe_close_pool() + + def on_init(self): + """Optional callback called at init.""" + pass + + def start(self, argv=None): + return instantiate( + 'celery.bin.celery:CeleryCommand', + app=self).execute_from_commandline(argv) + + def worker_main(self, argv=None): + return instantiate( + 'celery.bin.worker:worker', + app=self).execute_from_commandline(argv) + + def task(self, *args, **opts): + """Creates new task class from any callable.""" + if _EXECV and not opts.get('_force_evaluate'): + # When using execv the task in the original module will point to a + # different app, so doing things like 'add.request' will point to + # a differnt task instance. This makes sure it will always use + # the task instance from the current app. + # Really need a better solution for this :( + from . import shared_task + return shared_task(*args, _force_evaluate=True, **opts) + + def inner_create_task_cls(shared=True, filter=None, **opts): + _filt = filter # stupid 2to3 + + def _create_task_cls(fun): + if shared: + cons = lambda app: app._task_from_fun(fun, **opts) + cons.__name__ = fun.__name__ + connect_on_app_finalize(cons) + if self.accept_magic_kwargs: # compat mode + task = self._task_from_fun(fun, **opts) + if filter: + task = filter(task) + return task + + if self.finalized or opts.get('_force_evaluate'): + ret = self._task_from_fun(fun, **opts) + else: + # return a proxy object that evaluates on first use + ret = PromiseProxy(self._task_from_fun, (fun, ), opts, + __doc__=fun.__doc__) + self._pending.append(ret) + if _filt: + return _filt(ret) + return ret + + return _create_task_cls + + if len(args) == 1: + if callable(args[0]): + return inner_create_task_cls(**opts)(*args) + raise TypeError('argument 1 to @task() must be a callable') + if args: + raise TypeError( + '@task() takes exactly 1 argument ({0} given)'.format( + sum([len(args), len(opts)]))) + return inner_create_task_cls(**opts) + + def _task_from_fun(self, fun, **options): + if not self.finalized and not self.autofinalize: + raise RuntimeError('Contract breach: app not finalized') + base = options.pop('base', None) or self.Task + bind = options.pop('bind', False) + + T = type(fun.__name__, (base, ), dict({ + 'app': self, + 'accept_magic_kwargs': False, + 'run': fun if bind else staticmethod(fun), + '_decorated': True, + '__doc__': fun.__doc__, + '__module__': fun.__module__, + '__wrapped__': fun}, **options))() + task = self._tasks[T.name] # return global instance. + return task + + def finalize(self, auto=False): + with self._finalize_mutex: + if not self.finalized: + if auto and not self.autofinalize: + raise RuntimeError('Contract breach: app not finalized') + self.finalized = True + _announce_app_finalized(self) + + pending = self._pending + while pending: + maybe_evaluate(pending.popleft()) + + for task in values(self._tasks): + task.bind(self) + + def add_defaults(self, fun): + if not callable(fun): + d, fun = fun, lambda: d + if self.configured: + return self.conf.add_defaults(fun()) + self._pending_defaults.append(fun) + + def config_from_object(self, obj, silent=False, force=False): + self._config_source = obj + if force or self.configured: + del(self.conf) + return self.loader.config_from_object(obj, silent=silent) + + def config_from_envvar(self, variable_name, silent=False, force=False): + module_name = os.environ.get(variable_name) + if not module_name: + if silent: + return False + raise ImproperlyConfigured( + ERR_ENVVAR_NOT_SET.format(variable_name)) + return self.config_from_object(module_name, silent=silent, force=force) + + def config_from_cmdline(self, argv, namespace='celery'): + self.conf.update(self.loader.cmdline_config_parser(argv, namespace)) + + def setup_security(self, allowed_serializers=None, key=None, cert=None, + store=None, digest='sha1', serializer='json'): + from celery.security import setup_security + return setup_security(allowed_serializers, key, cert, + store, digest, serializer, app=self) + + def autodiscover_tasks(self, packages, related_name='tasks', force=False): + if force: + return self._autodiscover_tasks(packages, related_name) + signals.import_modules.connect(promise( + self._autodiscover_tasks, (packages, related_name), + ), weak=False, sender=self) + + def _autodiscover_tasks(self, packages, related_name='tasks', **kwargs): + # argument may be lazy + packages = packages() if callable(packages) else packages + self.loader.autodiscover_tasks(packages, related_name) + + def send_task(self, name, args=None, kwargs=None, countdown=None, + eta=None, task_id=None, producer=None, connection=None, + router=None, result_cls=None, expires=None, + publisher=None, link=None, link_error=None, + add_to_parent=True, reply_to=None, **options): + task_id = task_id or uuid() + producer = producer or publisher # XXX compat + router = router or self.amqp.router + conf = self.conf + if conf.CELERY_ALWAYS_EAGER: # pragma: no cover + warnings.warn(AlwaysEagerIgnored( + 'CELERY_ALWAYS_EAGER has no effect on send_task', + ), stacklevel=2) + options = router.route(options, name, args, kwargs) + if connection: + producer = self.amqp.TaskProducer(connection) + with self.producer_or_acquire(producer) as P: + self.backend.on_task_call(P, task_id) + task_id = P.publish_task( + name, args, kwargs, countdown=countdown, eta=eta, + task_id=task_id, expires=expires, + callbacks=maybe_list(link), errbacks=maybe_list(link_error), + reply_to=reply_to or self.oid, **options + ) + result = (result_cls or self.AsyncResult)(task_id) + if add_to_parent: + parent = get_current_worker_task() + if parent: + parent.add_trail(result) + return result + + def connection(self, hostname=None, userid=None, password=None, + virtual_host=None, port=None, ssl=None, + connect_timeout=None, transport=None, + transport_options=None, heartbeat=None, + login_method=None, failover_strategy=None, **kwargs): + conf = self.conf + return self.amqp.Connection( + hostname or conf.BROKER_URL, + userid or conf.BROKER_USER, + password or conf.BROKER_PASSWORD, + virtual_host or conf.BROKER_VHOST, + port or conf.BROKER_PORT, + transport=transport or conf.BROKER_TRANSPORT, + ssl=self.either('BROKER_USE_SSL', ssl), + heartbeat=heartbeat, + login_method=login_method or conf.BROKER_LOGIN_METHOD, + failover_strategy=( + failover_strategy or conf.BROKER_FAILOVER_STRATEGY + ), + transport_options=dict( + conf.BROKER_TRANSPORT_OPTIONS, **transport_options or {} + ), + connect_timeout=self.either( + 'BROKER_CONNECTION_TIMEOUT', connect_timeout + ), + ) + broker_connection = connection + + def _acquire_connection(self, pool=True): + """Helper for :meth:`connection_or_acquire`.""" + if pool: + return self.pool.acquire(block=True) + return self.connection() + + def connection_or_acquire(self, connection=None, pool=True, *_, **__): + return FallbackContext(connection, self._acquire_connection, pool=pool) + default_connection = connection_or_acquire # XXX compat + + def producer_or_acquire(self, producer=None): + return FallbackContext( + producer, self.amqp.producer_pool.acquire, block=True, + ) + default_producer = producer_or_acquire # XXX compat + + def prepare_config(self, c): + """Prepare configuration before it is merged with the defaults.""" + return find_deprecated_settings(c) + + def now(self): + return self.loader.now(utc=self.conf.CELERY_ENABLE_UTC) + + def mail_admins(self, subject, body, fail_silently=False): + if self.conf.ADMINS: + to = [admin_email for _, admin_email in self.conf.ADMINS] + return self.loader.mail_admins( + subject, body, fail_silently, to=to, + sender=self.conf.SERVER_EMAIL, + host=self.conf.EMAIL_HOST, + port=self.conf.EMAIL_PORT, + user=self.conf.EMAIL_HOST_USER, + password=self.conf.EMAIL_HOST_PASSWORD, + timeout=self.conf.EMAIL_TIMEOUT, + use_ssl=self.conf.EMAIL_USE_SSL, + use_tls=self.conf.EMAIL_USE_TLS, + ) + + def select_queues(self, queues=None): + return self.amqp.queues.select(queues) + + def either(self, default_key, *values): + """Fallback to the value of a configuration key if none of the + `*values` are true.""" + return first(None, values) or self.conf.get(default_key) + + def bugreport(self): + return bugreport(self) + + def _get_backend(self): + from celery.backends import get_backend_by_url + backend, url = get_backend_by_url( + self.backend_cls or self.conf.CELERY_RESULT_BACKEND, + self.loader) + return backend(app=self, url=url) + + def on_configure(self): + """Callback calld when the app loads configuration""" + pass + + def _get_config(self): + self.on_configure() + if self._config_source: + self.loader.config_from_object(self._config_source) + defaults = dict(deepcopy(DEFAULTS), **self._preconf) + self.configured = True + s = Settings({}, [self.prepare_config(self.loader.conf), + defaults]) + # load lazy config dict initializers. + pending = self._pending_defaults + while pending: + s.add_defaults(maybe_evaluate(pending.popleft()())) + return s + + def _after_fork(self, obj_): + self._maybe_close_pool() + + def _maybe_close_pool(self): + if self._pool: + self._pool.force_close_all() + self._pool = None + amqp = self.__dict__.get('amqp') + if amqp is not None and amqp._producer_pool is not None: + amqp._producer_pool.force_close_all() + amqp._producer_pool = None + + def signature(self, *args, **kwargs): + kwargs['app'] = self + return self.canvas.signature(*args, **kwargs) + + def create_task_cls(self): + """Creates a base task class using default configuration + taken from this app.""" + return self.subclass_with_self( + self.task_cls, name='Task', attribute='_app', + keep_reduce=True, abstract=True, + ) + + def subclass_with_self(self, Class, name=None, attribute='app', + reverse=None, keep_reduce=False, **kw): + """Subclass an app-compatible class by setting its app attribute + to be this app instance. + + App-compatible means that the class has a class attribute that + provides the default app it should use, e.g. + ``class Foo: app = None``. + + :param Class: The app-compatible class to subclass. + :keyword name: Custom name for the target class. + :keyword attribute: Name of the attribute holding the app, + default is 'app'. + + """ + Class = symbol_by_name(Class) + reverse = reverse if reverse else Class.__name__ + + def __reduce__(self): + return _unpickle_appattr, (reverse, self.__reduce_args__()) + + attrs = dict({attribute: self}, __module__=Class.__module__, + __doc__=Class.__doc__, **kw) + if not keep_reduce: + attrs['__reduce__'] = __reduce__ + + return type(name or Class.__name__, (Class, ), attrs) + + def _rgetattr(self, path): + return attrgetter(path)(self) + + def __repr__(self): + return '<{0} {1}>'.format(type(self).__name__, appstr(self)) + + def __reduce__(self): + if self._using_v1_reduce: + return self.__reduce_v1__() + return (_unpickle_app_v2, (self.__class__, self.__reduce_keys__())) + + def __reduce_v1__(self): + # Reduce only pickles the configuration changes, + # so the default configuration doesn't have to be passed + # between processes. + return ( + _unpickle_app, + (self.__class__, self.Pickler) + self.__reduce_args__(), + ) + + def __reduce_keys__(self): + """Return keyword arguments used to reconstruct the object + when unpickling.""" + return { + 'main': self.main, + 'changes': self.conf.changes, + 'loader': self.loader_cls, + 'backend': self.backend_cls, + 'amqp': self.amqp_cls, + 'events': self.events_cls, + 'log': self.log_cls, + 'control': self.control_cls, + 'accept_magic_kwargs': self.accept_magic_kwargs, + 'fixups': self.fixups, + 'config_source': self._config_source, + 'task_cls': self.task_cls, + } + + def __reduce_args__(self): + """Deprecated method, please use :meth:`__reduce_keys__` instead.""" + return (self.main, self.conf.changes, + self.loader_cls, self.backend_cls, self.amqp_cls, + self.events_cls, self.log_cls, self.control_cls, + self.accept_magic_kwargs, self._config_source) + + @cached_property + def Worker(self): + return self.subclass_with_self('celery.apps.worker:Worker') + + @cached_property + def WorkController(self, **kwargs): + return self.subclass_with_self('celery.worker:WorkController') + + @cached_property + def Beat(self, **kwargs): + return self.subclass_with_self('celery.apps.beat:Beat') + + @cached_property + def Task(self): + return self.create_task_cls() + + @cached_property + def annotations(self): + return prepare_annotations(self.conf.CELERY_ANNOTATIONS) + + @cached_property + def AsyncResult(self): + return self.subclass_with_self('celery.result:AsyncResult') + + @cached_property + def ResultSet(self): + return self.subclass_with_self('celery.result:ResultSet') + + @cached_property + def GroupResult(self): + return self.subclass_with_self('celery.result:GroupResult') + + @cached_property + def TaskSet(self): # XXX compat + """Deprecated! Please use :class:`celery.group` instead.""" + return self.subclass_with_self('celery.task.sets:TaskSet') + + @cached_property + def TaskSetResult(self): # XXX compat + """Deprecated! Please use :attr:`GroupResult` instead.""" + return self.subclass_with_self('celery.result:TaskSetResult') + + @property + def pool(self): + if self._pool is None: + _ensure_after_fork() + limit = self.conf.BROKER_POOL_LIMIT + self._pool = self.connection().Pool(limit=limit) + return self._pool + + @property + def current_task(self): + return _task_stack.top + + @cached_property + def oid(self): + return oid_from(self) + + @cached_property + def amqp(self): + return instantiate(self.amqp_cls, app=self) + + @cached_property + def backend(self): + return self._get_backend() + + @cached_property + def conf(self): + return self._get_config() + + @cached_property + def control(self): + return instantiate(self.control_cls, app=self) + + @cached_property + def events(self): + return instantiate(self.events_cls, app=self) + + @cached_property + def loader(self): + return get_loader_cls(self.loader_cls)(app=self) + + @cached_property + def log(self): + return instantiate(self.log_cls, app=self) + + @cached_property + def canvas(self): + from celery import canvas + return canvas + + @cached_property + def tasks(self): + self.finalize(auto=True) + return self._tasks + + @cached_property + def timezone(self): + from celery.utils.timeutils import timezone + conf = self.conf + tz = conf.CELERY_TIMEZONE + if not tz: + return (timezone.get_timezone('UTC') if conf.CELERY_ENABLE_UTC + else timezone.local) + return timezone.get_timezone(self.conf.CELERY_TIMEZONE) +App = Celery # compat diff --git a/celery/app/builtins.py b/celery/app/builtins.py new file mode 100644 index 0000000..e42e0b2 --- /dev/null +++ b/celery/app/builtins.py @@ -0,0 +1,372 @@ +# -*- coding: utf-8 -*- +""" + celery.app.builtins + ~~~~~~~~~~~~~~~~~~~ + + Built-in tasks that are always available in all + app instances. E.g. chord, group and xmap. + +""" +from __future__ import absolute_import + +from collections import deque + +from celery._state import get_current_worker_task, connect_on_app_finalize +from celery.utils import uuid +from celery.utils.log import get_logger + +__all__ = [] + +logger = get_logger(__name__) + + +@connect_on_app_finalize +def add_backend_cleanup_task(app): + """The backend cleanup task can be used to clean up the default result + backend. + + If the configured backend requires periodic cleanup this task is also + automatically configured to run every day at midnight (requires + :program:`celery beat` to be running). + + """ + @app.task(name='celery.backend_cleanup', + shared=False, _force_evaluate=True) + def backend_cleanup(): + app.backend.cleanup() + return backend_cleanup + + +@connect_on_app_finalize +def add_unlock_chord_task(app): + """This task is used by result backends without native chord support. + + It joins chords by creating a task chain polling the header for completion. + + """ + from celery.canvas import signature + from celery.exceptions import ChordError + from celery.result import allow_join_result, result_from_tuple + + default_propagate = app.conf.CELERY_CHORD_PROPAGATES + + @app.task(name='celery.chord_unlock', max_retries=None, shared=False, + default_retry_delay=1, ignore_result=True, _force_evaluate=True) + def unlock_chord(group_id, callback, interval=None, propagate=None, + max_retries=None, result=None, + Result=app.AsyncResult, GroupResult=app.GroupResult, + result_from_tuple=result_from_tuple): + # if propagate is disabled exceptions raised by chord tasks + # will be sent as part of the result list to the chord callback. + # Since 3.1 propagate will be enabled by default, and instead + # the chord callback changes state to FAILURE with the + # exception set to ChordError. + propagate = default_propagate if propagate is None else propagate + if interval is None: + interval = unlock_chord.default_retry_delay + + # check if the task group is ready, and if so apply the callback. + deps = GroupResult( + group_id, + [result_from_tuple(r, app=app) for r in result], + ) + j = deps.join_native if deps.supports_native_join else deps.join + + if deps.ready(): + callback = signature(callback, app=app) + try: + with allow_join_result(): + ret = j(timeout=3.0, propagate=propagate) + except Exception as exc: + try: + culprit = next(deps._failed_join_report()) + reason = 'Dependency {0.id} raised {1!r}'.format( + culprit, exc, + ) + except StopIteration: + reason = repr(exc) + logger.error('Chord %r raised: %r', group_id, exc, exc_info=1) + app.backend.chord_error_from_stack(callback, + ChordError(reason)) + else: + try: + callback.delay(ret) + except Exception as exc: + logger.error('Chord %r raised: %r', group_id, exc, + exc_info=1) + app.backend.chord_error_from_stack( + callback, + exc=ChordError('Callback error: {0!r}'.format(exc)), + ) + else: + raise unlock_chord.retry(countdown=interval, + max_retries=max_retries) + return unlock_chord + + +@connect_on_app_finalize +def add_map_task(app): + from celery.canvas import signature + + @app.task(name='celery.map', shared=False, _force_evaluate=True) + def xmap(task, it): + task = signature(task, app=app).type + return [task(item) for item in it] + return xmap + + +@connect_on_app_finalize +def add_starmap_task(app): + from celery.canvas import signature + + @app.task(name='celery.starmap', shared=False, _force_evaluate=True) + def xstarmap(task, it): + task = signature(task, app=app).type + return [task(*item) for item in it] + return xstarmap + + +@connect_on_app_finalize +def add_chunk_task(app): + from celery.canvas import chunks as _chunks + + @app.task(name='celery.chunks', shared=False, _force_evaluate=True) + def chunks(task, it, n): + return _chunks.apply_chunks(task, it, n) + return chunks + + +@connect_on_app_finalize +def add_group_task(app): + _app = app + from celery.canvas import maybe_signature, signature + from celery.result import result_from_tuple + + class Group(app.Task): + app = _app + name = 'celery.group' + accept_magic_kwargs = False + _decorated = True + + def run(self, tasks, result, group_id, partial_args, + add_to_parent=True): + app = self.app + result = result_from_tuple(result, app) + # any partial args are added to all tasks in the group + taskit = (signature(task, app=app).clone(partial_args) + for i, task in enumerate(tasks)) + if self.request.is_eager or app.conf.CELERY_ALWAYS_EAGER: + return app.GroupResult( + result.id, + [stask.apply(group_id=group_id) for stask in taskit], + ) + with app.producer_or_acquire() as pub: + [stask.apply_async(group_id=group_id, producer=pub, + add_to_parent=False) for stask in taskit] + parent = get_current_worker_task() + if add_to_parent and parent: + parent.add_trail(result) + return result + + def prepare(self, options, tasks, args, **kwargs): + options['group_id'] = group_id = ( + options.setdefault('task_id', uuid())) + + def prepare_member(task): + task = maybe_signature(task, app=self.app) + task.options['group_id'] = group_id + return task, task.freeze() + + try: + tasks, res = list(zip( + *[prepare_member(task) for task in tasks] + )) + except ValueError: # tasks empty + tasks, res = [], [] + return (tasks, self.app.GroupResult(group_id, res), group_id, args) + + def apply_async(self, partial_args=(), kwargs={}, **options): + if self.app.conf.CELERY_ALWAYS_EAGER: + return self.apply(partial_args, kwargs, **options) + tasks, result, gid, args = self.prepare( + options, args=partial_args, **kwargs + ) + super(Group, self).apply_async(( + list(tasks), result.as_tuple(), gid, args), **options + ) + return result + + def apply(self, args=(), kwargs={}, **options): + return super(Group, self).apply( + self.prepare(options, args=args, **kwargs), + **options).get() + return Group + + +@connect_on_app_finalize +def add_chain_task(app): + from celery.canvas import ( + Signature, chain, chord, group, maybe_signature, maybe_unroll_group, + ) + + _app = app + + class Chain(app.Task): + app = _app + name = 'celery.chain' + accept_magic_kwargs = False + _decorated = True + + def prepare_steps(self, args, tasks): + app = self.app + steps = deque(tasks) + next_step = prev_task = prev_res = None + tasks, results = [], [] + i = 0 + while steps: + # First task get partial args from chain. + task = maybe_signature(steps.popleft(), app=app) + task = task.clone() if i else task.clone(args) + res = task.freeze() + i += 1 + + if isinstance(task, group): + task = maybe_unroll_group(task) + if isinstance(task, chain): + # splice the chain + steps.extendleft(reversed(task.tasks)) + continue + + elif isinstance(task, group) and steps and \ + not isinstance(steps[0], group): + # automatically upgrade group(..) | s to chord(group, s) + try: + next_step = steps.popleft() + # for chords we freeze by pretending it's a normal + # task instead of a group. + res = Signature.freeze(next_step) + task = chord(task, body=next_step, task_id=res.task_id) + except IndexError: + pass # no callback, so keep as group + if prev_task: + # link previous task to this task. + prev_task.link(task) + # set the results parent attribute. + if not res.parent: + res.parent = prev_res + + if not isinstance(prev_task, chord): + results.append(res) + tasks.append(task) + prev_task, prev_res = task, res + + return tasks, results + + def apply_async(self, args=(), kwargs={}, group_id=None, chord=None, + task_id=None, link=None, link_error=None, **options): + if self.app.conf.CELERY_ALWAYS_EAGER: + return self.apply(args, kwargs, **options) + options.pop('publisher', None) + tasks, results = self.prepare_steps(args, kwargs['tasks']) + result = results[-1] + if group_id: + tasks[-1].set(group_id=group_id) + if chord: + tasks[-1].set(chord=chord) + if task_id: + tasks[-1].set(task_id=task_id) + result = tasks[-1].type.AsyncResult(task_id) + # make sure we can do a link() and link_error() on a chain object. + if link: + tasks[-1].set(link=link) + # and if any task in the chain fails, call the errbacks + if link_error: + for task in tasks: + task.set(link_error=link_error) + tasks[0].apply_async(**options) + return result + + def apply(self, args=(), kwargs={}, signature=maybe_signature, + **options): + app = self.app + last, fargs = None, args # fargs passed to first task only + for task in kwargs['tasks']: + res = signature(task, app=app).clone(fargs).apply( + last and (last.get(), ), + ) + res.parent, last, fargs = last, res, None + return last + return Chain + + +@connect_on_app_finalize +def add_chord_task(app): + """Every chord is executed in a dedicated task, so that the chord + can be used as a signature, and this generates the task + responsible for that.""" + from celery import group + from celery.canvas import maybe_signature + _app = app + default_propagate = app.conf.CELERY_CHORD_PROPAGATES + + class Chord(app.Task): + app = _app + name = 'celery.chord' + accept_magic_kwargs = False + ignore_result = False + _decorated = True + + def run(self, header, body, partial_args=(), interval=None, + countdown=1, max_retries=None, propagate=None, + eager=False, **kwargs): + app = self.app + propagate = default_propagate if propagate is None else propagate + group_id = uuid() + + # - convert back to group if serialized + tasks = header.tasks if isinstance(header, group) else header + header = group([ + maybe_signature(s, app=app).clone() for s in tasks + ], app=self.app) + # - eager applies the group inline + if eager: + return header.apply(args=partial_args, task_id=group_id) + + body.setdefault('chord_size', len(header.tasks)) + results = header.freeze(group_id=group_id, chord=body).results + + return self.backend.apply_chord( + header, partial_args, group_id, + body, interval=interval, countdown=countdown, + max_retries=max_retries, propagate=propagate, result=results, + ) + + def apply_async(self, args=(), kwargs={}, task_id=None, + group_id=None, chord=None, **options): + app = self.app + if app.conf.CELERY_ALWAYS_EAGER: + return self.apply(args, kwargs, **options) + header = kwargs.pop('header') + body = kwargs.pop('body') + header, body = (maybe_signature(header, app=app), + maybe_signature(body, app=app)) + # forward certain options to body + if chord is not None: + body.options['chord'] = chord + if group_id is not None: + body.options['group_id'] = group_id + [body.link(s) for s in options.pop('link', [])] + [body.link_error(s) for s in options.pop('link_error', [])] + body_result = body.freeze(task_id) + parent = super(Chord, self).apply_async((header, body, args), + kwargs, **options) + body_result.parent = parent + return body_result + + def apply(self, args=(), kwargs={}, propagate=True, **options): + body = kwargs['body'] + res = super(Chord, self).apply(args, dict(kwargs, eager=True), + **options) + return maybe_signature(body, app=self.app).apply( + args=(res.get(propagate=propagate).get(), )) + return Chord diff --git a/celery/app/control.py b/celery/app/control.py new file mode 100644 index 0000000..2845374 --- /dev/null +++ b/celery/app/control.py @@ -0,0 +1,308 @@ +# -*- coding: utf-8 -*- +""" + celery.app.control + ~~~~~~~~~~~~~~~~~~~ + + Client for worker remote control commands. + Server implementation is in :mod:`celery.worker.control`. + +""" +from __future__ import absolute_import + +import warnings + +from kombu.pidbox import Mailbox +from kombu.utils import cached_property + +from celery.exceptions import DuplicateNodenameWarning +from celery.utils.text import pluralize + +__all__ = ['Inspect', 'Control', 'flatten_reply'] + +W_DUPNODE = """\ +Received multiple replies from node name: {0!r}. +Please make sure you give each node a unique nodename using the `-n` option.\ +""" + + +def flatten_reply(reply): + nodes, dupes = {}, set() + for item in reply: + [dupes.add(name) for name in item if name in nodes] + nodes.update(item) + if dupes: + warnings.warn(DuplicateNodenameWarning( + W_DUPNODE.format( + pluralize(len(dupes), 'name'), ', '.join(sorted(dupes)), + ), + )) + return nodes + + +class Inspect(object): + app = None + + def __init__(self, destination=None, timeout=1, callback=None, + connection=None, app=None, limit=None): + self.app = app or self.app + self.destination = destination + self.timeout = timeout + self.callback = callback + self.connection = connection + self.limit = limit + + def _prepare(self, reply): + if not reply: + return + by_node = flatten_reply(reply) + if self.destination and \ + not isinstance(self.destination, (list, tuple)): + return by_node.get(self.destination) + return by_node + + def _request(self, command, **kwargs): + return self._prepare(self.app.control.broadcast( + command, + arguments=kwargs, + destination=self.destination, + callback=self.callback, + connection=self.connection, + limit=self.limit, + timeout=self.timeout, reply=True, + )) + + def report(self): + return self._request('report') + + def clock(self): + return self._request('clock') + + def active(self, safe=False): + return self._request('dump_active', safe=safe) + + def scheduled(self, safe=False): + return self._request('dump_schedule', safe=safe) + + def reserved(self, safe=False): + return self._request('dump_reserved', safe=safe) + + def stats(self): + return self._request('stats') + + def revoked(self): + return self._request('dump_revoked') + + def registered(self, *taskinfoitems): + return self._request('dump_tasks', taskinfoitems=taskinfoitems) + registered_tasks = registered + + def ping(self): + return self._request('ping') + + def active_queues(self): + return self._request('active_queues') + + def query_task(self, ids): + return self._request('query_task', ids=ids) + + def conf(self, with_defaults=False): + return self._request('dump_conf', with_defaults=with_defaults) + + def hello(self, from_node, revoked=None): + return self._request('hello', from_node=from_node, revoked=revoked) + + def memsample(self): + return self._request('memsample') + + def memdump(self, samples=10): + return self._request('memdump', samples=samples) + + def objgraph(self, type='Request', n=200, max_depth=10): + return self._request('objgraph', num=n, max_depth=max_depth, type=type) + + +class Control(object): + Mailbox = Mailbox + + def __init__(self, app=None): + self.app = app + self.mailbox = self.Mailbox('celery', type='fanout', accept=['json']) + + @cached_property + def inspect(self): + return self.app.subclass_with_self(Inspect, reverse='control.inspect') + + def purge(self, connection=None): + """Discard all waiting tasks. + + This will ignore all tasks waiting for execution, and they will + be deleted from the messaging server. + + :returns: the number of tasks discarded. + + """ + with self.app.connection_or_acquire(connection) as conn: + return self.app.amqp.TaskConsumer(conn).purge() + discard_all = purge + + def election(self, id, topic, action=None, connection=None): + self.broadcast('election', connection=connection, arguments={ + 'id': id, 'topic': topic, 'action': action, + }) + + def revoke(self, task_id, destination=None, terminate=False, + signal='SIGTERM', **kwargs): + """Tell all (or specific) workers to revoke a task by id. + + If a task is revoked, the workers will ignore the task and + not execute it after all. + + :param task_id: Id of the task to revoke. + :keyword terminate: Also terminate the process currently working + on the task (if any). + :keyword signal: Name of signal to send to process if terminate. + Default is TERM. + + See :meth:`broadcast` for supported keyword arguments. + + """ + return self.broadcast('revoke', destination=destination, + arguments={'task_id': task_id, + 'terminate': terminate, + 'signal': signal}, **kwargs) + + def ping(self, destination=None, timeout=1, **kwargs): + """Ping all (or specific) workers. + + Will return the list of answers. + + See :meth:`broadcast` for supported keyword arguments. + + """ + return self.broadcast('ping', reply=True, destination=destination, + timeout=timeout, **kwargs) + + def rate_limit(self, task_name, rate_limit, destination=None, **kwargs): + """Tell all (or specific) workers to set a new rate limit + for task by type. + + :param task_name: Name of task to change rate limit for. + :param rate_limit: The rate limit as tasks per second, or a rate limit + string (`'100/m'`, etc. + see :attr:`celery.task.base.Task.rate_limit` for + more information). + + See :meth:`broadcast` for supported keyword arguments. + + """ + return self.broadcast('rate_limit', destination=destination, + arguments={'task_name': task_name, + 'rate_limit': rate_limit}, + **kwargs) + + def add_consumer(self, queue, exchange=None, exchange_type='direct', + routing_key=None, options=None, **kwargs): + """Tell all (or specific) workers to start consuming from a new queue. + + Only the queue name is required as if only the queue is specified + then the exchange/routing key will be set to the same name ( + like automatic queues do). + + .. note:: + + This command does not respect the default queue/exchange + options in the configuration. + + :param queue: Name of queue to start consuming from. + :keyword exchange: Optional name of exchange. + :keyword exchange_type: Type of exchange (defaults to 'direct') + command to, when empty broadcast to all workers. + :keyword routing_key: Optional routing key. + :keyword options: Additional options as supported + by :meth:`kombu.entitiy.Queue.from_dict`. + + See :meth:`broadcast` for supported keyword arguments. + + """ + return self.broadcast( + 'add_consumer', + arguments=dict({'queue': queue, 'exchange': exchange, + 'exchange_type': exchange_type, + 'routing_key': routing_key}, **options or {}), + **kwargs + ) + + def cancel_consumer(self, queue, **kwargs): + """Tell all (or specific) workers to stop consuming from ``queue``. + + Supports the same keyword arguments as :meth:`broadcast`. + + """ + return self.broadcast( + 'cancel_consumer', arguments={'queue': queue}, **kwargs + ) + + def time_limit(self, task_name, soft=None, hard=None, **kwargs): + """Tell all (or specific) workers to set time limits for + a task by type. + + :param task_name: Name of task to change time limits for. + :keyword soft: New soft time limit (in seconds). + :keyword hard: New hard time limit (in seconds). + + Any additional keyword arguments are passed on to :meth:`broadcast`. + + """ + return self.broadcast( + 'time_limit', + arguments={'task_name': task_name, + 'hard': hard, 'soft': soft}, **kwargs) + + def enable_events(self, destination=None, **kwargs): + """Tell all (or specific) workers to enable events.""" + return self.broadcast('enable_events', {}, destination, **kwargs) + + def disable_events(self, destination=None, **kwargs): + """Tell all (or specific) workers to enable events.""" + return self.broadcast('disable_events', {}, destination, **kwargs) + + def pool_grow(self, n=1, destination=None, **kwargs): + """Tell all (or specific) workers to grow the pool by ``n``. + + Supports the same arguments as :meth:`broadcast`. + + """ + return self.broadcast('pool_grow', {'n': n}, destination, **kwargs) + + def pool_shrink(self, n=1, destination=None, **kwargs): + """Tell all (or specific) workers to shrink the pool by ``n``. + + Supports the same arguments as :meth:`broadcast`. + + """ + return self.broadcast('pool_shrink', {'n': n}, destination, **kwargs) + + def broadcast(self, command, arguments=None, destination=None, + connection=None, reply=False, timeout=1, limit=None, + callback=None, channel=None, **extra_kwargs): + """Broadcast a control command to the celery workers. + + :param command: Name of command to send. + :param arguments: Keyword arguments for the command. + :keyword destination: If set, a list of the hosts to send the + command to, when empty broadcast to all workers. + :keyword connection: Custom broker connection to use, if not set, + a connection will be established automatically. + :keyword reply: Wait for and return the reply. + :keyword timeout: Timeout in seconds to wait for the reply. + :keyword limit: Limit number of replies. + :keyword callback: Callback called immediately for each reply + received. + + """ + with self.app.connection_or_acquire(connection) as conn: + arguments = dict(arguments or {}, **extra_kwargs) + return self.mailbox(conn)._broadcast( + command, arguments, destination, reply, timeout, + limit, callback, channel=channel, + ) diff --git a/celery/app/defaults.py b/celery/app/defaults.py new file mode 100644 index 0000000..15f7fcf --- /dev/null +++ b/celery/app/defaults.py @@ -0,0 +1,269 @@ +# -*- coding: utf-8 -*- +""" + celery.app.defaults + ~~~~~~~~~~~~~~~~~~~ + + Configuration introspection and defaults. + +""" +from __future__ import absolute_import + +import sys + +from collections import deque, namedtuple +from datetime import timedelta + +from celery.five import items +from celery.utils import strtobool +from celery.utils.functional import memoize + +__all__ = ['Option', 'NAMESPACES', 'flatten', 'find'] + +is_jython = sys.platform.startswith('java') +is_pypy = hasattr(sys, 'pypy_version_info') + +DEFAULT_POOL = 'prefork' +if is_jython: + DEFAULT_POOL = 'threads' +elif is_pypy: + if sys.pypy_version_info[0:3] < (1, 5, 0): + DEFAULT_POOL = 'solo' + else: + DEFAULT_POOL = 'prefork' + +DEFAULT_ACCEPT_CONTENT = ['json', 'pickle', 'msgpack', 'yaml'] +DEFAULT_PROCESS_LOG_FMT = """ + [%(asctime)s: %(levelname)s/%(processName)s] %(message)s +""".strip() +DEFAULT_LOG_FMT = '[%(asctime)s: %(levelname)s] %(message)s' +DEFAULT_TASK_LOG_FMT = """[%(asctime)s: %(levelname)s/%(processName)s] \ +%(task_name)s[%(task_id)s]: %(message)s""" + +_BROKER_OLD = {'deprecate_by': '2.5', 'remove_by': '4.0', + 'alt': 'BROKER_URL setting'} +_REDIS_OLD = {'deprecate_by': '2.5', 'remove_by': '4.0', + 'alt': 'URL form of CELERY_RESULT_BACKEND'} + +searchresult = namedtuple('searchresult', ('namespace', 'key', 'type')) + + +class Option(object): + alt = None + deprecate_by = None + remove_by = None + typemap = dict(string=str, int=int, float=float, any=lambda v: v, + bool=strtobool, dict=dict, tuple=tuple) + + def __init__(self, default=None, *args, **kwargs): + self.default = default + self.type = kwargs.get('type') or 'string' + for attr, value in items(kwargs): + setattr(self, attr, value) + + def to_python(self, value): + return self.typemap[self.type](value) + + def __repr__(self): + return '{0} default->{1!r}>'.format(self.type, + self.default) + +NAMESPACES = { + 'BROKER': { + 'URL': Option(None, type='string'), + 'CONNECTION_TIMEOUT': Option(4, type='float'), + 'CONNECTION_RETRY': Option(True, type='bool'), + 'CONNECTION_MAX_RETRIES': Option(100, type='int'), + 'FAILOVER_STRATEGY': Option(None, type='string'), + 'HEARTBEAT': Option(None, type='int'), + 'HEARTBEAT_CHECKRATE': Option(3.0, type='int'), + 'LOGIN_METHOD': Option(None, type='string'), + 'POOL_LIMIT': Option(10, type='int'), + 'USE_SSL': Option(False, type='bool'), + 'TRANSPORT': Option(type='string'), + 'TRANSPORT_OPTIONS': Option({}, type='dict'), + 'HOST': Option(type='string', **_BROKER_OLD), + 'PORT': Option(type='int', **_BROKER_OLD), + 'USER': Option(type='string', **_BROKER_OLD), + 'PASSWORD': Option(type='string', **_BROKER_OLD), + 'VHOST': Option(type='string', **_BROKER_OLD), + }, + 'CASSANDRA': { + 'COLUMN_FAMILY': Option(type='string'), + 'DETAILED_MODE': Option(False, type='bool'), + 'KEYSPACE': Option(type='string'), + 'READ_CONSISTENCY': Option(type='string'), + 'SERVERS': Option(type='list'), + 'WRITE_CONSISTENCY': Option(type='string'), + }, + 'CELERY': { + 'ACCEPT_CONTENT': Option(DEFAULT_ACCEPT_CONTENT, type='list'), + 'ACKS_LATE': Option(False, type='bool'), + 'ALWAYS_EAGER': Option(False, type='bool'), + 'ANNOTATIONS': Option(type='any'), + 'BROADCAST_QUEUE': Option('celeryctl'), + 'BROADCAST_EXCHANGE': Option('celeryctl'), + 'BROADCAST_EXCHANGE_TYPE': Option('fanout'), + 'CACHE_BACKEND': Option(), + 'CACHE_BACKEND_OPTIONS': Option({}, type='dict'), + 'CHORD_PROPAGATES': Option(True, type='bool'), + 'COUCHBASE_BACKEND_SETTINGS': Option(None, type='dict'), + 'CREATE_MISSING_QUEUES': Option(True, type='bool'), + 'DEFAULT_RATE_LIMIT': Option(type='string'), + 'DISABLE_RATE_LIMITS': Option(False, type='bool'), + 'DEFAULT_ROUTING_KEY': Option('celery'), + 'DEFAULT_QUEUE': Option('celery'), + 'DEFAULT_EXCHANGE': Option('celery'), + 'DEFAULT_EXCHANGE_TYPE': Option('direct'), + 'DEFAULT_DELIVERY_MODE': Option(2, type='string'), + 'EAGER_PROPAGATES_EXCEPTIONS': Option(False, type='bool'), + 'ENABLE_UTC': Option(True, type='bool'), + 'ENABLE_REMOTE_CONTROL': Option(True, type='bool'), + 'EVENT_SERIALIZER': Option('json'), + 'EVENT_QUEUE_EXPIRES': Option(None, type='float'), + 'EVENT_QUEUE_TTL': Option(None, type='float'), + 'IMPORTS': Option((), type='tuple'), + 'INCLUDE': Option((), type='tuple'), + 'IGNORE_RESULT': Option(False, type='bool'), + 'MAX_CACHED_RESULTS': Option(100, type='int'), + 'MESSAGE_COMPRESSION': Option(type='string'), + 'MONGODB_BACKEND_SETTINGS': Option(type='dict'), + 'REDIS_HOST': Option(type='string', **_REDIS_OLD), + 'REDIS_PORT': Option(type='int', **_REDIS_OLD), + 'REDIS_DB': Option(type='int', **_REDIS_OLD), + 'REDIS_PASSWORD': Option(type='string', **_REDIS_OLD), + 'REDIS_MAX_CONNECTIONS': Option(type='int'), + 'RESULT_BACKEND': Option(type='string'), + 'RESULT_DB_SHORT_LIVED_SESSIONS': Option(False, type='bool'), + 'RESULT_DB_TABLENAMES': Option(type='dict'), + 'RESULT_DBURI': Option(), + 'RESULT_ENGINE_OPTIONS': Option(type='dict'), + 'RESULT_EXCHANGE': Option('celeryresults'), + 'RESULT_EXCHANGE_TYPE': Option('direct'), + 'RESULT_SERIALIZER': Option('pickle'), + 'RESULT_PERSISTENT': Option(None, type='bool'), + 'ROUTES': Option(type='any'), + 'SEND_EVENTS': Option(False, type='bool'), + 'SEND_TASK_ERROR_EMAILS': Option(False, type='bool'), + 'SEND_TASK_SENT_EVENT': Option(False, type='bool'), + 'STORE_ERRORS_EVEN_IF_IGNORED': Option(False, type='bool'), + 'TASK_PUBLISH_RETRY': Option(True, type='bool'), + 'TASK_PUBLISH_RETRY_POLICY': Option({ + 'max_retries': 3, + 'interval_start': 0, + 'interval_max': 1, + 'interval_step': 0.2}, type='dict'), + 'TASK_RESULT_EXPIRES': Option(timedelta(days=1), type='float'), + 'TASK_SERIALIZER': Option('pickle'), + 'TIMEZONE': Option(type='string'), + 'TRACK_STARTED': Option(False, type='bool'), + 'REDIRECT_STDOUTS': Option(True, type='bool'), + 'REDIRECT_STDOUTS_LEVEL': Option('WARNING'), + 'QUEUES': Option(type='dict'), + 'QUEUE_HA_POLICY': Option(None, type='string'), + 'SECURITY_KEY': Option(type='string'), + 'SECURITY_CERTIFICATE': Option(type='string'), + 'SECURITY_CERT_STORE': Option(type='string'), + 'WORKER_DIRECT': Option(False, type='bool'), + }, + 'CELERYD': { + 'AGENT': Option(None, type='string'), + 'AUTOSCALER': Option('celery.worker.autoscale:Autoscaler'), + 'AUTORELOADER': Option('celery.worker.autoreload:Autoreloader'), + 'CONCURRENCY': Option(0, type='int'), + 'TIMER': Option(type='string'), + 'TIMER_PRECISION': Option(1.0, type='float'), + 'FORCE_EXECV': Option(False, type='bool'), + 'HIJACK_ROOT_LOGGER': Option(True, type='bool'), + 'CONSUMER': Option('celery.worker.consumer:Consumer', type='string'), + 'LOG_FORMAT': Option(DEFAULT_PROCESS_LOG_FMT), + 'LOG_COLOR': Option(type='bool'), + 'LOG_LEVEL': Option('WARN', deprecate_by='2.4', remove_by='4.0', + alt='--loglevel argument'), + 'LOG_FILE': Option(deprecate_by='2.4', remove_by='4.0', + alt='--logfile argument'), + 'MAX_TASKS_PER_CHILD': Option(type='int'), + 'POOL': Option(DEFAULT_POOL), + 'POOL_PUTLOCKS': Option(True, type='bool'), + 'POOL_RESTARTS': Option(False, type='bool'), + 'PREFETCH_MULTIPLIER': Option(4, type='int'), + 'STATE_DB': Option(), + 'TASK_LOG_FORMAT': Option(DEFAULT_TASK_LOG_FMT), + 'TASK_SOFT_TIME_LIMIT': Option(type='float'), + 'TASK_TIME_LIMIT': Option(type='float'), + 'WORKER_LOST_WAIT': Option(10.0, type='float') + }, + 'CELERYBEAT': { + 'SCHEDULE': Option({}, type='dict'), + 'SCHEDULER': Option('celery.beat:PersistentScheduler'), + 'SCHEDULE_FILENAME': Option('celerybeat-schedule'), + 'SYNC_EVERY': Option(0, type='int'), + 'MAX_LOOP_INTERVAL': Option(0, type='float'), + 'LOG_LEVEL': Option('INFO', deprecate_by='2.4', remove_by='4.0', + alt='--loglevel argument'), + 'LOG_FILE': Option(deprecate_by='2.4', remove_by='4.0', + alt='--logfile argument'), + }, + 'CELERYMON': { + 'LOG_LEVEL': Option('INFO', deprecate_by='2.4', remove_by='4.0', + alt='--loglevel argument'), + 'LOG_FILE': Option(deprecate_by='2.4', remove_by='4.0', + alt='--logfile argument'), + 'LOG_FORMAT': Option(DEFAULT_LOG_FMT), + }, + 'EMAIL': { + 'HOST': Option('localhost'), + 'PORT': Option(25, type='int'), + 'HOST_USER': Option(), + 'HOST_PASSWORD': Option(), + 'TIMEOUT': Option(2, type='float'), + 'USE_SSL': Option(False, type='bool'), + 'USE_TLS': Option(False, type='bool'), + }, + 'SERVER_EMAIL': Option('celery@localhost'), + 'ADMINS': Option((), type='tuple'), +} + + +def flatten(d, ns=''): + stack = deque([(ns, d)]) + while stack: + name, space = stack.popleft() + for key, value in items(space): + if isinstance(value, dict): + stack.append((name + key + '_', value)) + else: + yield name + key, value +DEFAULTS = dict((key, value.default) for key, value in flatten(NAMESPACES)) + + +def find_deprecated_settings(source): + from celery.utils import warn_deprecated + for name, opt in flatten(NAMESPACES): + if (opt.deprecate_by or opt.remove_by) and getattr(source, name, None): + warn_deprecated(description='The {0!r} setting'.format(name), + deprecation=opt.deprecate_by, + removal=opt.remove_by, + alternative='Use the {0.alt} instead'.format(opt)) + return source + + +@memoize(maxsize=None) +def find(name, namespace='celery'): + # - Try specified namespace first. + namespace = namespace.upper() + try: + return searchresult( + namespace, name.upper(), NAMESPACES[namespace][name.upper()], + ) + except KeyError: + # - Try all the other namespaces. + for ns, keys in items(NAMESPACES): + if ns.upper() == name.upper(): + return searchresult(None, ns, keys) + elif isinstance(keys, dict): + try: + return searchresult(ns, name.upper(), keys[name.upper()]) + except KeyError: + pass + # - See if name is a qualname last. + return searchresult(None, name.upper(), DEFAULTS[name.upper()]) diff --git a/celery/app/log.py b/celery/app/log.py new file mode 100644 index 0000000..3d350e9 --- /dev/null +++ b/celery/app/log.py @@ -0,0 +1,257 @@ +# -*- coding: utf-8 -*- +""" + celery.app.log + ~~~~~~~~~~~~~~ + + The Celery instances logging section: ``Celery.log``. + + Sets up logging for the worker and other programs, + redirects stdouts, colors log output, patches logging + related compatibility fixes, and so on. + +""" +from __future__ import absolute_import + +import logging +import os +import sys + +from logging.handlers import WatchedFileHandler + +from kombu.log import NullHandler +from kombu.utils.encoding import set_default_encoding_file + +from celery import signals +from celery._state import get_current_task +from celery.five import class_property, string_t +from celery.utils import isatty, node_format +from celery.utils.log import ( + get_logger, mlevel, + ColorFormatter, ensure_process_aware_logger, + LoggingProxy, get_multiprocessing_logger, + reset_multiprocessing_logger, +) +from celery.utils.term import colored + +__all__ = ['TaskFormatter', 'Logging'] + +MP_LOG = os.environ.get('MP_LOG', False) + + +class TaskFormatter(ColorFormatter): + + def format(self, record): + task = get_current_task() + if task and task.request: + record.__dict__.update(task_id=task.request.id, + task_name=task.name) + else: + record.__dict__.setdefault('task_name', '???') + record.__dict__.setdefault('task_id', '???') + return ColorFormatter.format(self, record) + + +class Logging(object): + #: The logging subsystem is only configured once per process. + #: setup_logging_subsystem sets this flag, and subsequent calls + #: will do nothing. + _setup = False + + def __init__(self, app): + self.app = app + self.loglevel = mlevel(self.app.conf.CELERYD_LOG_LEVEL) + self.format = self.app.conf.CELERYD_LOG_FORMAT + self.task_format = self.app.conf.CELERYD_TASK_LOG_FORMAT + self.colorize = self.app.conf.CELERYD_LOG_COLOR + + def setup(self, loglevel=None, logfile=None, redirect_stdouts=False, + redirect_level='WARNING', colorize=None, hostname=None): + handled = self.setup_logging_subsystem( + loglevel, logfile, colorize=colorize, hostname=hostname, + ) + if not handled: + if redirect_stdouts: + self.redirect_stdouts(redirect_level) + os.environ.update( + CELERY_LOG_LEVEL=str(loglevel) if loglevel else '', + CELERY_LOG_FILE=str(logfile) if logfile else '', + ) + return handled + + def redirect_stdouts(self, loglevel=None, name='celery.redirected'): + self.redirect_stdouts_to_logger( + get_logger(name), loglevel=loglevel + ) + os.environ.update( + CELERY_LOG_REDIRECT='1', + CELERY_LOG_REDIRECT_LEVEL=str(loglevel or ''), + ) + + def setup_logging_subsystem(self, loglevel=None, logfile=None, format=None, + colorize=None, hostname=None, **kwargs): + if self.already_setup: + return + if logfile and hostname: + logfile = node_format(logfile, hostname) + self.already_setup = True + loglevel = mlevel(loglevel or self.loglevel) + format = format or self.format + colorize = self.supports_color(colorize, logfile) + reset_multiprocessing_logger() + ensure_process_aware_logger() + receivers = signals.setup_logging.send( + sender=None, loglevel=loglevel, logfile=logfile, + format=format, colorize=colorize, + ) + + if not receivers: + root = logging.getLogger() + + if self.app.conf.CELERYD_HIJACK_ROOT_LOGGER: + root.handlers = [] + get_logger('celery').handlers = [] + get_logger('celery.task').handlers = [] + get_logger('celery.redirected').handlers = [] + + # Configure root logger + self._configure_logger( + root, logfile, loglevel, format, colorize, **kwargs + ) + + # Configure the multiprocessing logger + self._configure_logger( + get_multiprocessing_logger(), + logfile, loglevel if MP_LOG else logging.ERROR, + format, colorize, **kwargs + ) + + signals.after_setup_logger.send( + sender=None, logger=root, + loglevel=loglevel, logfile=logfile, + format=format, colorize=colorize, + ) + + # then setup the root task logger. + self.setup_task_loggers(loglevel, logfile, colorize=colorize) + + try: + stream = logging.getLogger().handlers[0].stream + except (AttributeError, IndexError): + pass + else: + set_default_encoding_file(stream) + + # This is a hack for multiprocessing's fork+exec, so that + # logging before Process.run works. + logfile_name = logfile if isinstance(logfile, string_t) else '' + os.environ.update(_MP_FORK_LOGLEVEL_=str(loglevel), + _MP_FORK_LOGFILE_=logfile_name, + _MP_FORK_LOGFORMAT_=format) + return receivers + + def _configure_logger(self, logger, logfile, loglevel, + format, colorize, **kwargs): + if logger is not None: + self.setup_handlers(logger, logfile, format, + colorize, **kwargs) + if loglevel: + logger.setLevel(loglevel) + + def setup_task_loggers(self, loglevel=None, logfile=None, format=None, + colorize=None, propagate=False, **kwargs): + """Setup the task logger. + + If `logfile` is not specified, then `sys.stderr` is used. + + Will return the base task logger object. + + """ + loglevel = mlevel(loglevel or self.loglevel) + format = format or self.task_format + colorize = self.supports_color(colorize, logfile) + + logger = self.setup_handlers( + get_logger('celery.task'), + logfile, format, colorize, + formatter=TaskFormatter, **kwargs + ) + logger.setLevel(loglevel) + # this is an int for some reason, better not question why. + logger.propagate = int(propagate) + signals.after_setup_task_logger.send( + sender=None, logger=logger, + loglevel=loglevel, logfile=logfile, + format=format, colorize=colorize, + ) + return logger + + def redirect_stdouts_to_logger(self, logger, loglevel=None, + stdout=True, stderr=True): + """Redirect :class:`sys.stdout` and :class:`sys.stderr` to a + logging instance. + + :param logger: The :class:`logging.Logger` instance to redirect to. + :param loglevel: The loglevel redirected messages will be logged as. + + """ + proxy = LoggingProxy(logger, loglevel) + if stdout: + sys.stdout = proxy + if stderr: + sys.stderr = proxy + return proxy + + def supports_color(self, colorize=None, logfile=None): + colorize = self.colorize if colorize is None else colorize + if self.app.IS_WINDOWS: + # Windows does not support ANSI color codes. + return False + if colorize or colorize is None: + # Only use color if there is no active log file + # and stderr is an actual terminal. + return logfile is None and isatty(sys.stderr) + return colorize + + def colored(self, logfile=None, enabled=None): + return colored(enabled=self.supports_color(enabled, logfile)) + + def setup_handlers(self, logger, logfile, format, colorize, + formatter=ColorFormatter, **kwargs): + if self._is_configured(logger): + return logger + handler = self._detect_handler(logfile) + handler.setFormatter(formatter(format, use_color=colorize)) + logger.addHandler(handler) + return logger + + def _detect_handler(self, logfile=None): + """Create log handler with either a filename, an open stream + or :const:`None` (stderr).""" + logfile = sys.__stderr__ if logfile is None else logfile + if hasattr(logfile, 'write'): + return logging.StreamHandler(logfile) + return WatchedFileHandler(logfile) + + def _has_handler(self, logger): + if logger.handlers: + return any(not isinstance(h, NullHandler) for h in logger.handlers) + + def _is_configured(self, logger): + return self._has_handler(logger) and not getattr( + logger, '_rudimentary_setup', False) + + def setup_logger(self, name='celery', *args, **kwargs): + """Deprecated: No longer used.""" + self.setup_logging_subsystem(*args, **kwargs) + return logging.root + + def get_default_logger(self, name='celery', **kwargs): + return get_logger(name) + + @class_property + def already_setup(cls): + return cls._setup + + @already_setup.setter # noqa + def already_setup(cls, was_setup): + cls._setup = was_setup diff --git a/celery/app/registry.py b/celery/app/registry.py new file mode 100644 index 0000000..7046554 --- /dev/null +++ b/celery/app/registry.py @@ -0,0 +1,71 @@ +# -*- coding: utf-8 -*- +""" + celery.app.registry + ~~~~~~~~~~~~~~~~~~~ + + Registry of available tasks. + +""" +from __future__ import absolute_import + +import inspect + +from importlib import import_module + +from celery._state import get_current_app +from celery.exceptions import NotRegistered +from celery.five import items + +__all__ = ['TaskRegistry'] + + +class TaskRegistry(dict): + NotRegistered = NotRegistered + + def __missing__(self, key): + raise self.NotRegistered(key) + + def register(self, task): + """Register a task in the task registry. + + The task will be automatically instantiated if not already an + instance. + + """ + self[task.name] = inspect.isclass(task) and task() or task + + def unregister(self, name): + """Unregister task by name. + + :param name: name of the task to unregister, or a + :class:`celery.task.base.Task` with a valid `name` attribute. + + :raises celery.exceptions.NotRegistered: if the task has not + been registered. + + """ + try: + self.pop(getattr(name, 'name', name)) + except KeyError: + raise self.NotRegistered(name) + + # -- these methods are irrelevant now and will be removed in 4.0 + def regular(self): + return self.filter_types('regular') + + def periodic(self): + return self.filter_types('periodic') + + def filter_types(self, type): + return dict((name, task) for name, task in items(self) + if getattr(task, 'type', 'regular') == type) + + +def _unpickle_task(name): + return get_current_app().tasks[name] + + +def _unpickle_task_v2(name, module=None): + if module: + import_module(module) + return get_current_app().tasks[name] diff --git a/celery/app/routes.py b/celery/app/routes.py new file mode 100644 index 0000000..d654f9d --- /dev/null +++ b/celery/app/routes.py @@ -0,0 +1,93 @@ +# -*- coding: utf-8 -*- +""" + celery.routes + ~~~~~~~~~~~~~ + + Contains utilities for working with task routers, + (:setting:`CELERY_ROUTES`). + +""" +from __future__ import absolute_import + +from celery.exceptions import QueueNotFound +from celery.five import string_t +from celery.utils import lpmerge +from celery.utils.functional import firstmethod, mlazy +from celery.utils.imports import instantiate + +__all__ = ['MapRoute', 'Router', 'prepare'] + +_first_route = firstmethod('route_for_task') + + +class MapRoute(object): + """Creates a router out of a :class:`dict`.""" + + def __init__(self, map): + self.map = map + + def route_for_task(self, task, *args, **kwargs): + try: + return dict(self.map[task]) + except KeyError: + pass + + +class Router(object): + + def __init__(self, routes=None, queues=None, + create_missing=False, app=None): + self.app = app + self.queues = {} if queues is None else queues + self.routes = [] if routes is None else routes + self.create_missing = create_missing + + def route(self, options, task, args=(), kwargs={}): + options = self.expand_destination(options) # expands 'queue' + if self.routes: + route = self.lookup_route(task, args, kwargs) + if route: # expands 'queue' in route. + return lpmerge(self.expand_destination(route), options) + if 'queue' not in options: + options = lpmerge(self.expand_destination( + self.app.conf.CELERY_DEFAULT_QUEUE), options) + return options + + def expand_destination(self, route): + # Route can be a queue name: convenient for direct exchanges. + if isinstance(route, string_t): + queue, route = route, {} + else: + # can use defaults from configured queue, but override specific + # things (like the routing_key): great for topic exchanges. + queue = route.pop('queue', None) + + if queue: + try: + Q = self.queues[queue] # noqa + except KeyError: + raise QueueNotFound( + 'Queue {0!r} missing from CELERY_QUEUES'.format(queue)) + # needs to be declared by publisher + route['queue'] = Q + return route + + def lookup_route(self, task, args=None, kwargs=None): + return _first_route(self.routes, task, args, kwargs) + + +def prepare(routes): + """Expands the :setting:`CELERY_ROUTES` setting.""" + + def expand_route(route): + if isinstance(route, dict): + return MapRoute(route) + if isinstance(route, string_t): + return mlazy(instantiate, route) + return route + + if routes is None: + return () + if not isinstance(routes, (list, tuple)): + routes = (routes, ) + return [expand_route(route) for route in routes] diff --git a/celery/app/task.py b/celery/app/task.py new file mode 100644 index 0000000..1417af6 --- /dev/null +++ b/celery/app/task.py @@ -0,0 +1,922 @@ +# -*- coding: utf-8 -*- +""" + celery.app.task + ~~~~~~~~~~~~~~~ + + Task Implementation: Task request context, and the base task class. + +""" +from __future__ import absolute_import + +import sys + +from billiard.einfo import ExceptionInfo + +from celery import current_app +from celery import states +from celery._state import _task_stack +from celery.canvas import signature +from celery.exceptions import MaxRetriesExceededError, Reject, Retry +from celery.five import class_property, items, with_metaclass +from celery.local import Proxy +from celery.result import EagerResult +from celery.utils import gen_task_name, fun_takes_kwargs, uuid, maybe_reraise +from celery.utils.functional import mattrgetter, maybe_list +from celery.utils.imports import instantiate +from celery.utils.mail import ErrorMail + +from .annotations import resolve_all as resolve_all_annotations +from .registry import _unpickle_task_v2 +from .utils import appstr + +__all__ = ['Context', 'Task'] + +#: extracts attributes related to publishing a message from an object. +extract_exec_options = mattrgetter( + 'queue', 'routing_key', 'exchange', 'priority', 'expires', + 'serializer', 'delivery_mode', 'compression', 'time_limit', + 'soft_time_limit', 'immediate', 'mandatory', # imm+man is deprecated +) + +# We take __repr__ very seriously around here ;) +R_BOUND_TASK = '' +R_UNBOUND_TASK = '' +R_SELF_TASK = '<@task {0.name} bound to other {0.__self__}>' +R_INSTANCE = '<@task: {0.name} of {app}{flags}>' + + +class _CompatShared(object): + + def __init__(self, name, cons): + self.name = name + self.cons = cons + + def __hash__(self): + return hash(self.name) + + def __repr__(self): + return '' % (self.name, ) + + def __call__(self, app): + return self.cons(app) + + +def _strflags(flags, default=''): + if flags: + return ' ({0})'.format(', '.join(flags)) + return default + + +def _reprtask(task, fmt=None, flags=None): + flags = list(flags) if flags is not None else [] + flags.append('v2 compatible') if task.__v2_compat__ else None + if not fmt: + fmt = R_BOUND_TASK if task._app else R_UNBOUND_TASK + return fmt.format( + task, flags=_strflags(flags), + app=appstr(task._app) if task._app else None, + ) + + +class Context(object): + # Default context + logfile = None + loglevel = None + hostname = None + id = None + args = None + kwargs = None + retries = 0 + eta = None + expires = None + is_eager = False + headers = None + delivery_info = None + reply_to = None + correlation_id = None + taskset = None # compat alias to group + group = None + chord = None + utc = None + called_directly = True + callbacks = None + errbacks = None + timelimit = None + _children = None # see property + _protected = 0 + + def __init__(self, *args, **kwargs): + self.update(*args, **kwargs) + + def update(self, *args, **kwargs): + return self.__dict__.update(*args, **kwargs) + + def clear(self): + return self.__dict__.clear() + + def get(self, key, default=None): + return getattr(self, key, default) + + def __repr__(self): + return ''.format(vars(self)) + + @property + def children(self): + # children must be an empy list for every thread + if self._children is None: + self._children = [] + return self._children + + +class TaskType(type): + """Meta class for tasks. + + Automatically registers the task in the task registry (except + if the :attr:`Task.abstract`` attribute is set). + + If no :attr:`Task.name` attribute is provided, then the name is generated + from the module and class name. + + """ + _creation_count = {} # used by old non-abstract task classes + + def __new__(cls, name, bases, attrs): + new = super(TaskType, cls).__new__ + task_module = attrs.get('__module__') or '__main__' + + # - Abstract class: abstract attribute should not be inherited. + abstract = attrs.pop('abstract', None) + if abstract or not attrs.get('autoregister', True): + return new(cls, name, bases, attrs) + + # The 'app' attribute is now a property, with the real app located + # in the '_app' attribute. Previously this was a regular attribute, + # so we should support classes defining it. + app = attrs.pop('_app', None) or attrs.pop('app', None) + + # Attempt to inherit app from one the bases + if not isinstance(app, Proxy) and app is None: + for base in bases: + if getattr(base, '_app', None): + app = base._app + break + else: + app = current_app._get_current_object() + attrs['_app'] = app + + # - Automatically generate missing/empty name. + task_name = attrs.get('name') + if not task_name: + attrs['name'] = task_name = gen_task_name(app, name, task_module) + + if not attrs.get('_decorated'): + # non decorated tasks must also be shared in case + # an app is created multiple times due to modules + # imported under multiple names. + # Hairy stuff, here to be compatible with 2.x. + # People should not use non-abstract task classes anymore, + # use the task decorator. + from celery._state import connect_on_app_finalize + unique_name = '.'.join([task_module, name]) + if unique_name not in cls._creation_count: + # the creation count is used as a safety + # so that the same task is not added recursively + # to the set of constructors. + cls._creation_count[unique_name] = 1 + connect_on_app_finalize(_CompatShared( + unique_name, + lambda app: TaskType.__new__(cls, name, bases, + dict(attrs, _app=app)), + )) + + # - Create and register class. + # Because of the way import happens (recursively) + # we may or may not be the first time the task tries to register + # with the framework. There should only be one class for each task + # name, so we always return the registered version. + tasks = app._tasks + if task_name not in tasks: + tasks.register(new(cls, name, bases, attrs)) + instance = tasks[task_name] + instance.bind(app) + return instance.__class__ + + def __repr__(cls): + return _reprtask(cls) + + +@with_metaclass(TaskType) +class Task(object): + """Task base class. + + When called tasks apply the :meth:`run` method. This method must + be defined by all tasks (that is unless the :meth:`__call__` method + is overridden). + + """ + __trace__ = None + __v2_compat__ = False # set by old base in celery.task.base + + ErrorMail = ErrorMail + MaxRetriesExceededError = MaxRetriesExceededError + + #: Execution strategy used, or the qualified name of one. + Strategy = 'celery.worker.strategy:default' + + #: This is the instance bound to if the task is a method of a class. + __self__ = None + + #: The application instance associated with this task class. + _app = None + + #: Name of the task. + name = None + + #: If :const:`True` the task is an abstract base class. + abstract = True + + #: If disabled the worker will not forward magic keyword arguments. + #: Deprecated and scheduled for removal in v4.0. + accept_magic_kwargs = False + + #: Maximum number of retries before giving up. If set to :const:`None`, + #: it will **never** stop retrying. + max_retries = 3 + + #: Default time in seconds before a retry of the task should be + #: executed. 3 minutes by default. + default_retry_delay = 3 * 60 + + #: Rate limit for this task type. Examples: :const:`None` (no rate + #: limit), `'100/s'` (hundred tasks a second), `'100/m'` (hundred tasks + #: a minute),`'100/h'` (hundred tasks an hour) + rate_limit = None + + #: If enabled the worker will not store task state and return values + #: for this task. Defaults to the :setting:`CELERY_IGNORE_RESULT` + #: setting. + ignore_result = None + + #: If enabled the request will keep track of subtasks started by + #: this task, and this information will be sent with the result + #: (``result.children``). + trail = True + + #: When enabled errors will be stored even if the task is otherwise + #: configured to ignore results. + store_errors_even_if_ignored = None + + #: If enabled an email will be sent to :setting:`ADMINS` whenever a task + #: of this type fails. + send_error_emails = None + + #: The name of a serializer that are registered with + #: :mod:`kombu.serialization.registry`. Default is `'pickle'`. + serializer = None + + #: Hard time limit. + #: Defaults to the :setting:`CELERYD_TASK_TIME_LIMIT` setting. + time_limit = None + + #: Soft time limit. + #: Defaults to the :setting:`CELERYD_TASK_SOFT_TIME_LIMIT` setting. + soft_time_limit = None + + #: The result store backend used for this task. + backend = None + + #: If disabled this task won't be registered automatically. + autoregister = True + + #: If enabled the task will report its status as 'started' when the task + #: is executed by a worker. Disabled by default as the normal behaviour + #: is to not report that level of granularity. Tasks are either pending, + #: finished, or waiting to be retried. + #: + #: Having a 'started' status can be useful for when there are long + #: running tasks and there is a need to report which task is currently + #: running. + #: + #: The application default can be overridden using the + #: :setting:`CELERY_TRACK_STARTED` setting. + track_started = None + + #: When enabled messages for this task will be acknowledged **after** + #: the task has been executed, and not *just before* which is the + #: default behavior. + #: + #: Please note that this means the task may be executed twice if the + #: worker crashes mid execution (which may be acceptable for some + #: applications). + #: + #: The application default can be overridden with the + #: :setting:`CELERY_ACKS_LATE` setting. + acks_late = None + + #: Tuple of expected exceptions. + #: + #: These are errors that are expected in normal operation + #: and that should not be regarded as a real error by the worker. + #: Currently this means that the state will be updated to an error + #: state, but the worker will not log the event as an error. + throws = () + + #: Default task expiry time. + expires = None + + #: Some may expect a request to exist even if the task has not been + #: called. This should probably be deprecated. + _default_request = None + + _exec_options = None + + __bound__ = False + + from_config = ( + ('send_error_emails', 'CELERY_SEND_TASK_ERROR_EMAILS'), + ('serializer', 'CELERY_TASK_SERIALIZER'), + ('rate_limit', 'CELERY_DEFAULT_RATE_LIMIT'), + ('track_started', 'CELERY_TRACK_STARTED'), + ('acks_late', 'CELERY_ACKS_LATE'), + ('ignore_result', 'CELERY_IGNORE_RESULT'), + ('store_errors_even_if_ignored', + 'CELERY_STORE_ERRORS_EVEN_IF_IGNORED'), + ) + + _backend = None # set by backend property. + + __bound__ = False + + # - Tasks are lazily bound, so that configuration is not set + # - until the task is actually used + + @classmethod + def bind(self, app): + was_bound, self.__bound__ = self.__bound__, True + self._app = app + conf = app.conf + self._exec_options = None # clear option cache + + for attr_name, config_name in self.from_config: + if getattr(self, attr_name, None) is None: + setattr(self, attr_name, conf[config_name]) + if self.accept_magic_kwargs is None: + self.accept_magic_kwargs = app.accept_magic_kwargs + + # decorate with annotations from config. + if not was_bound: + self.annotate() + + from celery.utils.threads import LocalStack + self.request_stack = LocalStack() + + # PeriodicTask uses this to add itself to the PeriodicTask schedule. + self.on_bound(app) + + return app + + @classmethod + def on_bound(self, app): + """This method can be defined to do additional actions when the + task class is bound to an app.""" + pass + + @classmethod + def _get_app(self): + if self._app is None: + self._app = current_app + if not self.__bound__: + # The app property's __set__ method is not called + # if Task.app is set (on the class), so must bind on use. + self.bind(self._app) + return self._app + app = class_property(_get_app, bind) + + @classmethod + def annotate(self): + for d in resolve_all_annotations(self.app.annotations, self): + for key, value in items(d): + if key.startswith('@'): + self.add_around(key[1:], value) + else: + setattr(self, key, value) + + @classmethod + def add_around(self, attr, around): + orig = getattr(self, attr) + if getattr(orig, '__wrapped__', None): + orig = orig.__wrapped__ + meth = around(orig) + meth.__wrapped__ = orig + setattr(self, attr, meth) + + def __call__(self, *args, **kwargs): + _task_stack.push(self) + self.push_request() + try: + # add self if this is a bound task + if self.__self__ is not None: + return self.run(self.__self__, *args, **kwargs) + return self.run(*args, **kwargs) + finally: + self.pop_request() + _task_stack.pop() + + def __reduce__(self): + # - tasks are pickled into the name of the task only, and the reciever + # - simply grabs it from the local registry. + # - in later versions the module of the task is also included, + # - and the receiving side tries to import that module so that + # - it will work even if the task has not been registered. + mod = type(self).__module__ + mod = mod if mod and mod in sys.modules else None + return (_unpickle_task_v2, (self.name, mod), None) + + def run(self, *args, **kwargs): + """The body of the task executed by workers.""" + raise NotImplementedError('Tasks must define the run method.') + + def start_strategy(self, app, consumer, **kwargs): + return instantiate(self.Strategy, self, app, consumer, **kwargs) + + def delay(self, *args, **kwargs): + """Star argument version of :meth:`apply_async`. + + Does not support the extra options enabled by :meth:`apply_async`. + + :param \*args: positional arguments passed on to the task. + :param \*\*kwargs: keyword arguments passed on to the task. + + :returns :class:`celery.result.AsyncResult`: + + """ + return self.apply_async(args, kwargs) + + def apply_async(self, args=None, kwargs=None, task_id=None, producer=None, + link=None, link_error=None, **options): + """Apply tasks asynchronously by sending a message. + + :keyword args: The positional arguments to pass on to the + task (a :class:`list` or :class:`tuple`). + + :keyword kwargs: The keyword arguments to pass on to the + task (a :class:`dict`) + + :keyword countdown: Number of seconds into the future that the + task should execute. Defaults to immediate + execution. + + :keyword eta: A :class:`~datetime.datetime` object describing + the absolute time and date of when the task should + be executed. May not be specified if `countdown` + is also supplied. + + :keyword expires: Either a :class:`int`, describing the number of + seconds, or a :class:`~datetime.datetime` object + that describes the absolute time and date of when + the task should expire. The task will not be + executed after the expiration time. + + :keyword connection: Re-use existing broker connection instead + of establishing a new one. + + :keyword retry: If enabled sending of the task message will be retried + in the event of connection loss or failure. Default + is taken from the :setting:`CELERY_TASK_PUBLISH_RETRY` + setting. Note you need to handle the + producer/connection manually for this to work. + + :keyword retry_policy: Override the retry policy used. See the + :setting:`CELERY_TASK_PUBLISH_RETRY` setting. + + :keyword routing_key: Custom routing key used to route the task to a + worker server. If in combination with a + ``queue`` argument only used to specify custom + routing keys to topic exchanges. + + :keyword queue: The queue to route the task to. This must be a key + present in :setting:`CELERY_QUEUES`, or + :setting:`CELERY_CREATE_MISSING_QUEUES` must be + enabled. See :ref:`guide-routing` for more + information. + + :keyword exchange: Named custom exchange to send the task to. + Usually not used in combination with the ``queue`` + argument. + + :keyword priority: The task priority, a number between 0 and 9. + Defaults to the :attr:`priority` attribute. + + :keyword serializer: A string identifying the default + serialization method to use. Can be `pickle`, + `json`, `yaml`, `msgpack` or any custom + serialization method that has been registered + with :mod:`kombu.serialization.registry`. + Defaults to the :attr:`serializer` attribute. + + :keyword compression: A string identifying the compression method + to use. Can be one of ``zlib``, ``bzip2``, + or any custom compression methods registered with + :func:`kombu.compression.register`. Defaults to + the :setting:`CELERY_MESSAGE_COMPRESSION` + setting. + :keyword link: A single, or a list of tasks to apply if the + task exits successfully. + :keyword link_error: A single, or a list of tasks to apply + if an error occurs while executing the task. + + :keyword producer: :class:~@amqp.TaskProducer` instance to use. + :keyword add_to_parent: If set to True (default) and the task + is applied while executing another task, then the result + will be appended to the parent tasks ``request.children`` + attribute. Trailing can also be disabled by default using the + :attr:`trail` attribute + :keyword publisher: Deprecated alias to ``producer``. + + Also supports all keyword arguments supported by + :meth:`kombu.Producer.publish`. + + .. note:: + If the :setting:`CELERY_ALWAYS_EAGER` setting is set, it will + be replaced by a local :func:`apply` call instead. + + """ + app = self._get_app() + if app.conf.CELERY_ALWAYS_EAGER: + return self.apply(args, kwargs, task_id=task_id or uuid(), + link=link, link_error=link_error, **options) + # add 'self' if this is a "task_method". + if self.__self__ is not None: + args = args if isinstance(args, tuple) else tuple(args or ()) + args = (self.__self__, ) + args + return app.send_task( + self.name, args, kwargs, task_id=task_id, producer=producer, + link=link, link_error=link_error, result_cls=self.AsyncResult, + **dict(self._get_exec_options(), **options) + ) + + def subtask_from_request(self, request=None, args=None, kwargs=None, + queue=None, **extra_options): + request = self.request if request is None else request + args = request.args if args is None else args + kwargs = request.kwargs if kwargs is None else kwargs + limit_hard, limit_soft = request.timelimit or (None, None) + options = { + 'task_id': request.id, + 'link': request.callbacks, + 'link_error': request.errbacks, + 'group_id': request.group, + 'chord': request.chord, + 'soft_time_limit': limit_soft, + 'time_limit': limit_hard, + 'reply_to': request.reply_to, + } + options.update( + {'queue': queue} if queue else (request.delivery_info or {}) + ) + return self.subtask(args, kwargs, options, type=self, **extra_options) + + def retry(self, args=None, kwargs=None, exc=None, throw=True, + eta=None, countdown=None, max_retries=None, **options): + """Retry the task. + + :param args: Positional arguments to retry with. + :param kwargs: Keyword arguments to retry with. + :keyword exc: Custom exception to report when the max restart + limit has been exceeded (default: + :exc:`~@MaxRetriesExceededError`). + + If this argument is set and retry is called while + an exception was raised (``sys.exc_info()`` is set) + it will attempt to reraise the current exception. + + If no exception was raised it will raise the ``exc`` + argument provided. + :keyword countdown: Time in seconds to delay the retry for. + :keyword eta: Explicit time and date to run the retry at + (must be a :class:`~datetime.datetime` instance). + :keyword max_retries: If set, overrides the default retry limit. + :keyword time_limit: If set, overrides the default time limit. + :keyword soft_time_limit: If set, overrides the default soft + time limit. + :keyword \*\*options: Any extra options to pass on to + meth:`apply_async`. + :keyword throw: If this is :const:`False`, do not raise the + :exc:`~@Retry` exception, + that tells the worker to mark the task as being + retried. Note that this means the task will be + marked as failed if the task raises an exception, + or successful if it returns. + + :raises celery.exceptions.Retry: To tell the worker that + the task has been re-sent for retry. This always happens, + unless the `throw` keyword argument has been explicitly set + to :const:`False`, and is considered normal operation. + + **Example** + + .. code-block:: python + + >>> from imaginary_twitter_lib import Twitter + >>> from proj.celery import app + + >>> @app.task() + ... def tweet(auth, message): + ... twitter = Twitter(oauth=auth) + ... try: + ... twitter.post_status_update(message) + ... except twitter.FailWhale as exc: + ... # Retry in 5 minutes. + ... raise tweet.retry(countdown=60 * 5, exc=exc) + + Although the task will never return above as `retry` raises an + exception to notify the worker, we use `raise` in front of the retry + to convey that the rest of the block will not be executed. + + """ + request = self.request + retries = request.retries + 1 + max_retries = self.max_retries if max_retries is None else max_retries + + # Not in worker or emulated by (apply/always_eager), + # so just raise the original exception. + if request.called_directly: + maybe_reraise() # raise orig stack if PyErr_Occurred + raise exc or Retry('Task can be retried', None) + + if not eta and countdown is None: + countdown = self.default_retry_delay + + is_eager = request.is_eager + S = self.subtask_from_request( + request, args, kwargs, + countdown=countdown, eta=eta, retries=retries, + **options + ) + + if max_retries is not None and retries > max_retries: + if exc: + # first try to reraise the original exception + maybe_reraise() + # or if not in an except block then raise the custom exc. + raise exc() + raise self.MaxRetriesExceededError( + "Can't retry {0}[{1}] args:{2} kwargs:{3}".format( + self.name, request.id, S.args, S.kwargs)) + + # If task was executed eagerly using apply(), + # then the retry must also be executed eagerly. + try: + S.apply().get() if is_eager else S.apply_async() + except Exception as exc: + if is_eager: + raise + raise Reject(exc, requeue=False) + ret = Retry(exc=exc, when=eta or countdown) + if throw: + raise ret + return ret + + def apply(self, args=None, kwargs=None, + link=None, link_error=None, **options): + """Execute this task locally, by blocking until the task returns. + + :param args: positional arguments passed on to the task. + :param kwargs: keyword arguments passed on to the task. + :keyword throw: Re-raise task exceptions. Defaults to + the :setting:`CELERY_EAGER_PROPAGATES_EXCEPTIONS` + setting. + + :rtype :class:`celery.result.EagerResult`: + + """ + # trace imports Task, so need to import inline. + from celery.app.trace import eager_trace_task + + app = self._get_app() + args = args or () + # add 'self' if this is a bound method. + if self.__self__ is not None: + args = (self.__self__, ) + tuple(args) + kwargs = kwargs or {} + task_id = options.get('task_id') or uuid() + retries = options.get('retries', 0) + throw = app.either('CELERY_EAGER_PROPAGATES_EXCEPTIONS', + options.pop('throw', None)) + + # Make sure we get the task instance, not class. + task = app._tasks[self.name] + + request = {'id': task_id, + 'retries': retries, + 'is_eager': True, + 'logfile': options.get('logfile'), + 'loglevel': options.get('loglevel', 0), + 'callbacks': maybe_list(link), + 'errbacks': maybe_list(link_error), + 'headers': options.get('headers'), + 'delivery_info': {'is_eager': True}} + if self.accept_magic_kwargs: + default_kwargs = {'task_name': task.name, + 'task_id': task_id, + 'task_retries': retries, + 'task_is_eager': True, + 'logfile': options.get('logfile'), + 'loglevel': options.get('loglevel', 0), + 'delivery_info': {'is_eager': True}} + supported_keys = fun_takes_kwargs(task.run, default_kwargs) + extend_with = dict((key, val) + for key, val in items(default_kwargs) + if key in supported_keys) + kwargs.update(extend_with) + + tb = None + retval, info = eager_trace_task(task, task_id, args, kwargs, + app=self._get_app(), + request=request, propagate=throw) + if isinstance(retval, ExceptionInfo): + retval, tb = retval.exception, retval.traceback + state = states.SUCCESS if info is None else info.state + return EagerResult(task_id, retval, state, traceback=tb) + + def AsyncResult(self, task_id, **kwargs): + """Get AsyncResult instance for this kind of task. + + :param task_id: Task id to get result for. + + """ + return self._get_app().AsyncResult(task_id, backend=self.backend, + task_name=self.name, **kwargs) + + def subtask(self, args=None, *starargs, **starkwargs): + """Return :class:`~celery.signature` object for + this task, wrapping arguments and execution options + for a single task invocation.""" + starkwargs.setdefault('app', self.app) + return signature(self, args, *starargs, **starkwargs) + + def s(self, *args, **kwargs): + """``.s(*a, **k) -> .subtask(a, k)``""" + return self.subtask(args, kwargs) + + def si(self, *args, **kwargs): + """``.si(*a, **k) -> .subtask(a, k, immutable=True)``""" + return self.subtask(args, kwargs, immutable=True) + + def chunks(self, it, n): + """Creates a :class:`~celery.canvas.chunks` task for this task.""" + from celery import chunks + return chunks(self.s(), it, n, app=self.app) + + def map(self, it): + """Creates a :class:`~celery.canvas.xmap` task from ``it``.""" + from celery import xmap + return xmap(self.s(), it, app=self.app) + + def starmap(self, it): + """Creates a :class:`~celery.canvas.xstarmap` task from ``it``.""" + from celery import xstarmap + return xstarmap(self.s(), it, app=self.app) + + def send_event(self, type_, **fields): + req = self.request + with self.app.events.default_dispatcher(hostname=req.hostname) as d: + return d.send(type_, uuid=req.id, **fields) + + def update_state(self, task_id=None, state=None, meta=None): + """Update task state. + + :keyword task_id: Id of the task to update, defaults to the + id of the current task + :keyword state: New state (:class:`str`). + :keyword meta: State metadata (:class:`dict`). + + + + """ + if task_id is None: + task_id = self.request.id + self.backend.store_result(task_id, meta, state) + + def on_success(self, retval, task_id, args, kwargs): + """Success handler. + + Run by the worker if the task executes successfully. + + :param retval: The return value of the task. + :param task_id: Unique id of the executed task. + :param args: Original arguments for the executed task. + :param kwargs: Original keyword arguments for the executed task. + + The return value of this handler is ignored. + + """ + pass + + def on_retry(self, exc, task_id, args, kwargs, einfo): + """Retry handler. + + This is run by the worker when the task is to be retried. + + :param exc: The exception sent to :meth:`retry`. + :param task_id: Unique id of the retried task. + :param args: Original arguments for the retried task. + :param kwargs: Original keyword arguments for the retried task. + + :keyword einfo: :class:`~billiard.einfo.ExceptionInfo` + instance, containing the traceback. + + The return value of this handler is ignored. + + """ + pass + + def on_failure(self, exc, task_id, args, kwargs, einfo): + """Error handler. + + This is run by the worker when the task fails. + + :param exc: The exception raised by the task. + :param task_id: Unique id of the failed task. + :param args: Original arguments for the task that failed. + :param kwargs: Original keyword arguments for the task + that failed. + + :keyword einfo: :class:`~billiard.einfo.ExceptionInfo` + instance, containing the traceback. + + The return value of this handler is ignored. + + """ + pass + + def after_return(self, status, retval, task_id, args, kwargs, einfo): + """Handler called after the task returns. + + :param status: Current task state. + :param retval: Task return value/exception. + :param task_id: Unique id of the task. + :param args: Original arguments for the task that failed. + :param kwargs: Original keyword arguments for the task + that failed. + + :keyword einfo: :class:`~billiard.einfo.ExceptionInfo` + instance, containing the traceback (if any). + + The return value of this handler is ignored. + + """ + pass + + def send_error_email(self, context, exc, **kwargs): + if self.send_error_emails and \ + not getattr(self, 'disable_error_emails', None): + self.ErrorMail(self, **kwargs).send(context, exc) + + def add_trail(self, result): + if self.trail: + self.request.children.append(result) + return result + + def push_request(self, *args, **kwargs): + self.request_stack.push(Context(*args, **kwargs)) + + def pop_request(self): + self.request_stack.pop() + + def __repr__(self): + """`repr(task)`""" + return _reprtask(self, R_SELF_TASK if self.__self__ else R_INSTANCE) + + def _get_request(self): + """Get current request object.""" + req = self.request_stack.top + if req is None: + # task was not called, but some may still expect a request + # to be there, perhaps that should be deprecated. + if self._default_request is None: + self._default_request = Context() + return self._default_request + return req + request = property(_get_request) + + def _get_exec_options(self): + if self._exec_options is None: + self._exec_options = extract_exec_options(self) + return self._exec_options + + @property + def backend(self): + backend = self._backend + if backend is None: + return self.app.backend + return backend + + @backend.setter + def backend(self, value): # noqa + self._backend = value + + @property + def __name__(self): + return self.__class__.__name__ +BaseTask = Task # compat alias diff --git a/celery/app/trace.py b/celery/app/trace.py new file mode 100644 index 0000000..45e24c1 --- /dev/null +++ b/celery/app/trace.py @@ -0,0 +1,440 @@ +# -*- coding: utf-8 -*- +""" + celery.app.trace + ~~~~~~~~~~~~~~~~ + + This module defines how the task execution is traced: + errors are recorded, handlers are applied and so on. + +""" +from __future__ import absolute_import + +# ## --- +# This is the heart of the worker, the inner loop so to speak. +# It used to be split up into nice little classes and methods, +# but in the end it only resulted in bad performance and horrible tracebacks, +# so instead we now use one closure per task class. + +import os +import socket +import sys + +from warnings import warn + +from billiard.einfo import ExceptionInfo +from kombu.exceptions import EncodeError +from kombu.utils import kwdict + +from celery import current_app, group +from celery import states, signals +from celery._state import _task_stack +from celery.app import set_default_app +from celery.app.task import Task as BaseTask, Context +from celery.exceptions import Ignore, Reject, Retry +from celery.utils.log import get_logger +from celery.utils.objects import mro_lookup +from celery.utils.serialization import ( + get_pickleable_exception, + get_pickleable_etype, +) + +__all__ = ['TraceInfo', 'build_tracer', 'trace_task', 'eager_trace_task', + 'setup_worker_optimizations', 'reset_worker_optimizations'] + +_logger = get_logger(__name__) + +send_prerun = signals.task_prerun.send +send_postrun = signals.task_postrun.send +send_success = signals.task_success.send +STARTED = states.STARTED +SUCCESS = states.SUCCESS +IGNORED = states.IGNORED +REJECTED = states.REJECTED +RETRY = states.RETRY +FAILURE = states.FAILURE +EXCEPTION_STATES = states.EXCEPTION_STATES +IGNORE_STATES = frozenset([IGNORED, RETRY, REJECTED]) + +#: set by :func:`setup_worker_optimizations` +_tasks = None +_patched = {} + + +def task_has_custom(task, attr): + """Return true if the task or one of its bases + defines ``attr`` (excluding the one in BaseTask).""" + return mro_lookup(task.__class__, attr, stop=(BaseTask, object), + monkey_patched=['celery.app.task']) + + +class TraceInfo(object): + __slots__ = ('state', 'retval') + + def __init__(self, state, retval=None): + self.state = state + self.retval = retval + + def handle_error_state(self, task, eager=False): + store_errors = not eager + if task.ignore_result: + store_errors = task.store_errors_even_if_ignored + + return { + RETRY: self.handle_retry, + FAILURE: self.handle_failure, + }[self.state](task, store_errors=store_errors) + + def handle_retry(self, task, store_errors=True): + """Handle retry exception.""" + # the exception raised is the Retry semi-predicate, + # and it's exc' attribute is the original exception raised (if any). + req = task.request + type_, _, tb = sys.exc_info() + try: + reason = self.retval + einfo = ExceptionInfo((type_, reason, tb)) + if store_errors: + task.backend.mark_as_retry( + req.id, reason.exc, einfo.traceback, request=req, + ) + task.on_retry(reason.exc, req.id, req.args, req.kwargs, einfo) + signals.task_retry.send(sender=task, request=req, + reason=reason, einfo=einfo) + return einfo + finally: + del(tb) + + def handle_failure(self, task, store_errors=True): + """Handle exception.""" + req = task.request + type_, _, tb = sys.exc_info() + try: + exc = self.retval + einfo = ExceptionInfo() + einfo.exception = get_pickleable_exception(einfo.exception) + einfo.type = get_pickleable_etype(einfo.type) + if store_errors: + task.backend.mark_as_failure( + req.id, exc, einfo.traceback, request=req, + ) + task.on_failure(exc, req.id, req.args, req.kwargs, einfo) + signals.task_failure.send(sender=task, task_id=req.id, + exception=exc, args=req.args, + kwargs=req.kwargs, + traceback=tb, + einfo=einfo) + return einfo + finally: + del(tb) + + +def build_tracer(name, task, loader=None, hostname=None, store_errors=True, + Info=TraceInfo, eager=False, propagate=False, app=None, + IGNORE_STATES=IGNORE_STATES): + """Return a function that traces task execution; catches all + exceptions and updates result backend with the state and result + + If the call was successful, it saves the result to the task result + backend, and sets the task status to `"SUCCESS"`. + + If the call raises :exc:`~@Retry`, it extracts + the original exception, uses that as the result and sets the task state + to `"RETRY"`. + + If the call results in an exception, it saves the exception as the task + result, and sets the task state to `"FAILURE"`. + + Return a function that takes the following arguments: + + :param uuid: The id of the task. + :param args: List of positional args to pass on to the function. + :param kwargs: Keyword arguments mapping to pass on to the function. + :keyword request: Request dict. + + """ + # If the task doesn't define a custom __call__ method + # we optimize it away by simply calling the run method directly, + # saving the extra method call and a line less in the stack trace. + fun = task if task_has_custom(task, '__call__') else task.run + + loader = loader or app.loader + backend = task.backend + ignore_result = task.ignore_result + track_started = task.track_started + track_started = not eager and (task.track_started and not ignore_result) + publish_result = not eager and not ignore_result + hostname = hostname or socket.gethostname() + + loader_task_init = loader.on_task_init + loader_cleanup = loader.on_process_cleanup + + task_on_success = None + task_after_return = None + if task_has_custom(task, 'on_success'): + task_on_success = task.on_success + if task_has_custom(task, 'after_return'): + task_after_return = task.after_return + + store_result = backend.store_result + backend_cleanup = backend.process_cleanup + + pid = os.getpid() + + request_stack = task.request_stack + push_request = request_stack.push + pop_request = request_stack.pop + push_task = _task_stack.push + pop_task = _task_stack.pop + on_chord_part_return = backend.on_chord_part_return + + prerun_receivers = signals.task_prerun.receivers + postrun_receivers = signals.task_postrun.receivers + success_receivers = signals.task_success.receivers + + from celery import canvas + signature = canvas.maybe_signature # maybe_ does not clone if already + + def on_error(request, exc, uuid, state=FAILURE, call_errbacks=True): + if propagate: + raise + I = Info(state, exc) + R = I.handle_error_state(task, eager=eager) + if call_errbacks: + group( + [signature(errback, app=app) + for errback in request.errbacks or []], app=app, + ).apply_async((uuid, )) + return I, R, I.state, I.retval + + def trace_task(uuid, args, kwargs, request=None): + # R - is the possibly prepared return value. + # I - is the Info object. + # retval - is the always unmodified return value. + # state - is the resulting task state. + + # This function is very long because we have unrolled all the calls + # for performance reasons, and because the function is so long + # we want the main variables (I, and R) to stand out visually from the + # the rest of the variables, so breaking PEP8 is worth it ;) + R = I = retval = state = None + kwargs = kwdict(kwargs) + try: + push_task(task) + task_request = Context(request or {}, args=args, + called_directly=False, kwargs=kwargs) + push_request(task_request) + try: + # -*- PRE -*- + if prerun_receivers: + send_prerun(sender=task, task_id=uuid, task=task, + args=args, kwargs=kwargs) + loader_task_init(uuid, task) + if track_started: + store_result( + uuid, {'pid': pid, 'hostname': hostname}, STARTED, + request=task_request, + ) + + # -*- TRACE -*- + try: + R = retval = fun(*args, **kwargs) + state = SUCCESS + except Reject as exc: + I, R = Info(REJECTED, exc), ExceptionInfo(internal=True) + state, retval = I.state, I.retval + except Ignore as exc: + I, R = Info(IGNORED, exc), ExceptionInfo(internal=True) + state, retval = I.state, I.retval + except Retry as exc: + I, R, state, retval = on_error( + task_request, exc, uuid, RETRY, call_errbacks=False, + ) + except Exception as exc: + I, R, state, retval = on_error(task_request, exc, uuid) + except BaseException as exc: + raise + else: + try: + # callback tasks must be applied before the result is + # stored, so that result.children is populated. + + # groups are called inline and will store trail + # separately, so need to call them separately + # so that the trail's not added multiple times :( + # (Issue #1936) + callbacks = task.request.callbacks + if callbacks: + if len(task.request.callbacks) > 1: + sigs, groups = [], [] + for sig in callbacks: + sig = signature(sig, app=app) + if isinstance(sig, group): + groups.append(sig) + else: + sigs.append(sig) + for group_ in groups: + group.apply_async((retval, )) + if sigs: + group(sigs).apply_async(retval, ) + else: + signature(callbacks[0], app=app).delay(retval) + if publish_result: + store_result( + uuid, retval, SUCCESS, request=task_request, + ) + except EncodeError as exc: + I, R, state, retval = on_error(task_request, exc, uuid) + else: + if task_on_success: + task_on_success(retval, uuid, args, kwargs) + if success_receivers: + send_success(sender=task, result=retval) + + # -* POST *- + if state not in IGNORE_STATES: + if task_request.chord: + on_chord_part_return(task, state, R) + if task_after_return: + task_after_return( + state, retval, uuid, args, kwargs, None, + ) + finally: + try: + if postrun_receivers: + send_postrun(sender=task, task_id=uuid, task=task, + args=args, kwargs=kwargs, + retval=retval, state=state) + finally: + pop_task() + pop_request() + if not eager: + try: + backend_cleanup() + loader_cleanup() + except (KeyboardInterrupt, SystemExit, MemoryError): + raise + except Exception as exc: + _logger.error('Process cleanup failed: %r', exc, + exc_info=True) + except MemoryError: + raise + except Exception as exc: + if eager: + raise + R = report_internal_error(task, exc) + return R, I + + return trace_task + + +def trace_task(task, uuid, args, kwargs, request={}, **opts): + try: + if task.__trace__ is None: + task.__trace__ = build_tracer(task.name, task, **opts) + return task.__trace__(uuid, args, kwargs, request)[0] + except Exception as exc: + return report_internal_error(task, exc) + + +def _trace_task_ret(name, uuid, args, kwargs, request={}, app=None, **opts): + return trace_task((app or current_app).tasks[name], + uuid, args, kwargs, request, app=app, **opts) +trace_task_ret = _trace_task_ret + + +def _fast_trace_task(task, uuid, args, kwargs, request={}): + # setup_worker_optimizations will point trace_task_ret to here, + # so this is the function used in the worker. + return _tasks[task].__trace__(uuid, args, kwargs, request)[0] + + +def eager_trace_task(task, uuid, args, kwargs, request=None, **opts): + opts.setdefault('eager', True) + return build_tracer(task.name, task, **opts)( + uuid, args, kwargs, request) + + +def report_internal_error(task, exc): + _type, _value, _tb = sys.exc_info() + try: + _value = task.backend.prepare_exception(exc, 'pickle') + exc_info = ExceptionInfo((_type, _value, _tb), internal=True) + warn(RuntimeWarning( + 'Exception raised outside body: {0!r}:\n{1}'.format( + exc, exc_info.traceback))) + return exc_info + finally: + del(_tb) + + +def setup_worker_optimizations(app): + global _tasks + global trace_task_ret + + # make sure custom Task.__call__ methods that calls super + # will not mess up the request/task stack. + _install_stack_protection() + + # all new threads start without a current app, so if an app is not + # passed on to the thread it will fall back to the "default app", + # which then could be the wrong app. So for the worker + # we set this to always return our app. This is a hack, + # and means that only a single app can be used for workers + # running in the same process. + app.set_current() + set_default_app(app) + + # evaluate all task classes by finalizing the app. + app.finalize() + + # set fast shortcut to task registry + _tasks = app._tasks + + trace_task_ret = _fast_trace_task + from celery.worker import job as job_module + job_module.trace_task_ret = _fast_trace_task + job_module.__optimize__() + + +def reset_worker_optimizations(): + global trace_task_ret + trace_task_ret = _trace_task_ret + try: + delattr(BaseTask, '_stackprotected') + except AttributeError: + pass + try: + BaseTask.__call__ = _patched.pop('BaseTask.__call__') + except KeyError: + pass + from celery.worker import job as job_module + job_module.trace_task_ret = _trace_task_ret + + +def _install_stack_protection(): + # Patches BaseTask.__call__ in the worker to handle the edge case + # where people override it and also call super. + # + # - The worker optimizes away BaseTask.__call__ and instead + # calls task.run directly. + # - so with the addition of current_task and the request stack + # BaseTask.__call__ now pushes to those stacks so that + # they work when tasks are called directly. + # + # The worker only optimizes away __call__ in the case + # where it has not been overridden, so the request/task stack + # will blow if a custom task class defines __call__ and also + # calls super(). + if not getattr(BaseTask, '_stackprotected', False): + _patched['BaseTask.__call__'] = orig = BaseTask.__call__ + + def __protected_call__(self, *args, **kwargs): + stack = self.request_stack + req = stack.top + if req and not req._protected and \ + len(stack) == 1 and not req.called_directly: + req._protected = 1 + return self.run(*args, **kwargs) + return orig(self, *args, **kwargs) + BaseTask.__call__ = __protected_call__ + BaseTask._stackprotected = True diff --git a/celery/app/utils.py b/celery/app/utils.py new file mode 100644 index 0000000..86bf579 --- /dev/null +++ b/celery/app/utils.py @@ -0,0 +1,254 @@ +# -*- coding: utf-8 -*- +""" + celery.app.utils + ~~~~~~~~~~~~~~~~ + + App utilities: Compat settings, bugreport tool, pickling apps. + +""" +from __future__ import absolute_import + +import os +import platform as _platform +import re + +from collections import Mapping +from types import ModuleType + +from celery.datastructures import ConfigurationView +from celery.five import items, string_t, values +from celery.platforms import pyimplementation +from celery.utils.text import pretty +from celery.utils.imports import import_from_cwd, symbol_by_name, qualname + +from .defaults import find + +__all__ = ['Settings', 'appstr', 'bugreport', + 'filter_hidden_settings', 'find_app'] + +#: Format used to generate bugreport information. +BUGREPORT_INFO = """ +software -> celery:{celery_v} kombu:{kombu_v} py:{py_v} + billiard:{billiard_v} {driver_v} +platform -> system:{system} arch:{arch} imp:{py_i} +loader -> {loader} +settings -> transport:{transport} results:{results} + +{human_settings} +""" + +HIDDEN_SETTINGS = re.compile( + 'API|TOKEN|KEY|SECRET|PASS|PROFANITIES_LIST|SIGNATURE|DATABASE', + re.IGNORECASE, +) + + +def appstr(app): + """String used in __repr__ etc, to id app instances.""" + return '{0}:0x{1:x}'.format(app.main or '__main__', id(app)) + + +class Settings(ConfigurationView): + """Celery settings object.""" + + @property + def CELERY_RESULT_BACKEND(self): + return self.first('CELERY_RESULT_BACKEND', 'CELERY_BACKEND') + + @property + def BROKER_TRANSPORT(self): + return self.first('BROKER_TRANSPORT', + 'BROKER_BACKEND', 'CARROT_BACKEND') + + @property + def BROKER_BACKEND(self): + """Deprecated compat alias to :attr:`BROKER_TRANSPORT`.""" + return self.BROKER_TRANSPORT + + @property + def BROKER_URL(self): + return (os.environ.get('CELERY_BROKER_URL') or + self.first('BROKER_URL', 'BROKER_HOST')) + + @property + def CELERY_TIMEZONE(self): + # this way we also support django's time zone. + return self.first('CELERY_TIMEZONE', 'TIME_ZONE') + + def without_defaults(self): + """Return the current configuration, but without defaults.""" + # the last stash is the default settings, so just skip that + return Settings({}, self._order[:-1]) + + def value_set_for(self, key): + return key in self.without_defaults() + + def find_option(self, name, namespace='celery'): + """Search for option by name. + + Will return ``(namespace, key, type)`` tuple, e.g.:: + + >>> from proj.celery import app + >>> app.conf.find_option('disable_rate_limits') + ('CELERY', 'DISABLE_RATE_LIMITS', + bool default->False>)) + + :param name: Name of option, cannot be partial. + :keyword namespace: Preferred namespace (``CELERY`` by default). + + """ + return find(name, namespace) + + def find_value_for_key(self, name, namespace='celery'): + """Shortcut to ``get_by_parts(*find_option(name)[:-1])``""" + return self.get_by_parts(*self.find_option(name, namespace)[:-1]) + + def get_by_parts(self, *parts): + """Return the current value for setting specified as a path. + + Example:: + + >>> from proj.celery import app + >>> app.conf.get_by_parts('CELERY', 'DISABLE_RATE_LIMITS') + False + + """ + return self['_'.join(part for part in parts if part)] + + def table(self, with_defaults=False, censored=True): + filt = filter_hidden_settings if censored else lambda v: v + return filt(dict( + (k, v) for k, v in items( + self if with_defaults else self.without_defaults()) + if k.isupper() and not k.startswith('_') + )) + + def humanize(self, with_defaults=False, censored=True): + """Return a human readable string showing changes to the + configuration.""" + return '\n'.join( + '{0}: {1}'.format(key, pretty(value, width=50)) + for key, value in items(self.table(with_defaults, censored))) + + +class AppPickler(object): + """Old application pickler/unpickler (< 3.1).""" + + def __call__(self, cls, *args): + kwargs = self.build_kwargs(*args) + app = self.construct(cls, **kwargs) + self.prepare(app, **kwargs) + return app + + def prepare(self, app, **kwargs): + app.conf.update(kwargs['changes']) + + def build_kwargs(self, *args): + return self.build_standard_kwargs(*args) + + def build_standard_kwargs(self, main, changes, loader, backend, amqp, + events, log, control, accept_magic_kwargs, + config_source=None): + return dict(main=main, loader=loader, backend=backend, amqp=amqp, + changes=changes, events=events, log=log, control=control, + set_as_current=False, + accept_magic_kwargs=accept_magic_kwargs, + config_source=config_source) + + def construct(self, cls, **kwargs): + return cls(**kwargs) + + +def _unpickle_app(cls, pickler, *args): + """Rebuild app for versions 2.5+""" + return pickler()(cls, *args) + + +def _unpickle_app_v2(cls, kwargs): + """Rebuild app for versions 3.1+""" + kwargs['set_as_current'] = False + return cls(**kwargs) + + +def filter_hidden_settings(conf): + + def maybe_censor(key, value, mask='*' * 8): + if isinstance(value, Mapping): + return filter_hidden_settings(value) + if isinstance(key, string_t): + if HIDDEN_SETTINGS.search(key): + return mask + if 'BROKER_URL' in key.upper(): + from kombu import Connection + return Connection(value).as_uri(mask=mask) + return value + + return dict((k, maybe_censor(k, v)) for k, v in items(conf)) + + +def bugreport(app): + """Return a string containing information useful in bug reports.""" + import billiard + import celery + import kombu + + try: + conn = app.connection() + driver_v = '{0}:{1}'.format(conn.transport.driver_name, + conn.transport.driver_version()) + transport = conn.transport_cls + except Exception: + transport = driver_v = '' + + return BUGREPORT_INFO.format( + system=_platform.system(), + arch=', '.join(x for x in _platform.architecture() if x), + py_i=pyimplementation(), + celery_v=celery.VERSION_BANNER, + kombu_v=kombu.__version__, + billiard_v=billiard.__version__, + py_v=_platform.python_version(), + driver_v=driver_v, + transport=transport, + results=app.conf.CELERY_RESULT_BACKEND or 'disabled', + human_settings=app.conf.humanize(), + loader=qualname(app.loader.__class__), + ) + + +def find_app(app, symbol_by_name=symbol_by_name, imp=import_from_cwd): + from .base import Celery + + try: + sym = symbol_by_name(app, imp=imp) + except AttributeError: + # last part was not an attribute, but a module + sym = imp(app) + if isinstance(sym, ModuleType) and ':' not in app: + try: + found = sym.app + if isinstance(found, ModuleType): + raise AttributeError() + except AttributeError: + try: + found = sym.celery + if isinstance(found, ModuleType): + raise AttributeError() + except AttributeError: + if getattr(sym, '__path__', None): + try: + return find_app( + '{0}.celery'.format(app), + symbol_by_name=symbol_by_name, imp=imp, + ) + except ImportError: + pass + for suspect in values(vars(sym)): + if isinstance(suspect, Celery): + return suspect + raise + else: + return found + else: + return found + return sym diff --git a/celery/apps/__init__.py b/celery/apps/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/celery/apps/beat.py b/celery/apps/beat.py new file mode 100644 index 0000000..46cef9b --- /dev/null +++ b/celery/apps/beat.py @@ -0,0 +1,151 @@ +# -*- coding: utf-8 -*- +""" + celery.apps.beat + ~~~~~~~~~~~~~~~~ + + This module is the 'program-version' of :mod:`celery.beat`. + + It does everything necessary to run that module + as an actual application, like installing signal handlers + and so on. + +""" +from __future__ import absolute_import, unicode_literals + +import numbers +import socket +import sys + +from celery import VERSION_BANNER, platforms, beat +from celery.utils.imports import qualname +from celery.utils.log import LOG_LEVELS, get_logger +from celery.utils.timeutils import humanize_seconds + +__all__ = ['Beat'] + +STARTUP_INFO_FMT = """ +Configuration -> + . broker -> {conninfo} + . loader -> {loader} + . scheduler -> {scheduler} +{scheduler_info} + . logfile -> {logfile}@%{loglevel} + . maxinterval -> {hmax_interval} ({max_interval}s) +""".strip() + +logger = get_logger('celery.beat') + + +class Beat(object): + Service = beat.Service + app = None + + def __init__(self, max_interval=None, app=None, + socket_timeout=30, pidfile=None, no_color=None, + loglevel=None, logfile=None, schedule=None, + scheduler_cls=None, redirect_stdouts=None, + redirect_stdouts_level=None, **kwargs): + """Starts the beat task scheduler.""" + self.app = app = app or self.app + self.loglevel = self._getopt('log_level', loglevel) + self.logfile = self._getopt('log_file', logfile) + self.schedule = self._getopt('schedule_filename', schedule) + self.scheduler_cls = self._getopt('scheduler', scheduler_cls) + self.redirect_stdouts = self._getopt( + 'redirect_stdouts', redirect_stdouts, + ) + self.redirect_stdouts_level = self._getopt( + 'redirect_stdouts_level', redirect_stdouts_level, + ) + + self.max_interval = max_interval + self.socket_timeout = socket_timeout + self.no_color = no_color + self.colored = app.log.colored( + self.logfile, + enabled=not no_color if no_color is not None else no_color, + ) + self.pidfile = pidfile + + if not isinstance(self.loglevel, numbers.Integral): + self.loglevel = LOG_LEVELS[self.loglevel.upper()] + + def _getopt(self, key, value): + if value is not None: + return value + return self.app.conf.find_value_for_key(key, namespace='celerybeat') + + def run(self): + print(str(self.colored.cyan( + 'celery beat v{0} is starting.'.format(VERSION_BANNER)))) + self.init_loader() + self.set_process_title() + self.start_scheduler() + + def setup_logging(self, colorize=None): + if colorize is None and self.no_color is not None: + colorize = not self.no_color + self.app.log.setup(self.loglevel, self.logfile, + self.redirect_stdouts, self.redirect_stdouts_level, + colorize=colorize) + + def start_scheduler(self): + c = self.colored + if self.pidfile: + platforms.create_pidlock(self.pidfile) + beat = self.Service(app=self.app, + max_interval=self.max_interval, + scheduler_cls=self.scheduler_cls, + schedule_filename=self.schedule) + + print(str(c.blue('__ ', c.magenta('-'), + c.blue(' ... __ '), c.magenta('-'), + c.blue(' _\n'), + c.reset(self.startup_info(beat))))) + self.setup_logging() + if self.socket_timeout: + logger.debug('Setting default socket timeout to %r', + self.socket_timeout) + socket.setdefaulttimeout(self.socket_timeout) + try: + self.install_sync_handler(beat) + beat.start() + except Exception as exc: + logger.critical('beat raised exception %s: %r', + exc.__class__, exc, + exc_info=True) + + def init_loader(self): + # Run the worker init handler. + # (Usually imports task modules and such.) + self.app.loader.init_worker() + self.app.finalize() + + def startup_info(self, beat): + scheduler = beat.get_scheduler(lazy=True) + return STARTUP_INFO_FMT.format( + conninfo=self.app.connection().as_uri(), + logfile=self.logfile or '[stderr]', + loglevel=LOG_LEVELS[self.loglevel], + loader=qualname(self.app.loader), + scheduler=qualname(scheduler), + scheduler_info=scheduler.info, + hmax_interval=humanize_seconds(beat.max_interval), + max_interval=beat.max_interval, + ) + + def set_process_title(self): + arg_start = 'manage' in sys.argv[0] and 2 or 1 + platforms.set_process_title( + 'celery beat', info=' '.join(sys.argv[arg_start:]), + ) + + def install_sync_handler(self, beat): + """Install a `SIGTERM` + `SIGINT` handler that saves + the beat schedule.""" + + def _sync(signum, frame): + beat.sync() + raise SystemExit() + + platforms.signals.update(SIGTERM=_sync, SIGINT=_sync) diff --git a/celery/apps/worker.py b/celery/apps/worker.py new file mode 100644 index 0000000..d190711 --- /dev/null +++ b/celery/apps/worker.py @@ -0,0 +1,371 @@ +# -*- coding: utf-8 -*- +""" + celery.apps.worker + ~~~~~~~~~~~~~~~~~~ + + This module is the 'program-version' of :mod:`celery.worker`. + + It does everything necessary to run that module + as an actual application, like installing signal handlers, + platform tweaks, and so on. + +""" +from __future__ import absolute_import, print_function, unicode_literals + +import logging +import os +import platform as _platform +import sys +import warnings + +from functools import partial + +from billiard import current_process +from kombu.utils.encoding import safe_str + +from celery import VERSION_BANNER, platforms, signals +from celery.app import trace +from celery.exceptions import ( + CDeprecationWarning, WorkerShutdown, WorkerTerminate, +) +from celery.five import string, string_t +from celery.loaders.app import AppLoader +from celery.platforms import check_privileges +from celery.utils import cry, isatty +from celery.utils.imports import qualname +from celery.utils.log import get_logger, in_sighandler, set_in_sighandler +from celery.utils.text import pluralize +from celery.worker import WorkController + +__all__ = ['Worker'] + +logger = get_logger(__name__) +is_jython = sys.platform.startswith('java') +is_pypy = hasattr(sys, 'pypy_version_info') + +W_PICKLE_DEPRECATED = """ +Starting from version 3.2 Celery will refuse to accept pickle by default. + +The pickle serializer is a security concern as it may give attackers +the ability to execute any command. It's important to secure +your broker from unauthorized access when using pickle, so we think +that enabling pickle should require a deliberate action and not be +the default choice. + +If you depend on pickle then you should set a setting to disable this +warning and to be sure that everything will continue working +when you upgrade to Celery 3.2:: + + CELERY_ACCEPT_CONTENT = ['pickle', 'json', 'msgpack', 'yaml'] + +You must only enable the serializers that you will actually use. + +""" + + +def active_thread_count(): + from threading import enumerate + return sum(1 for t in enumerate() + if not t.name.startswith('Dummy-')) + + +def safe_say(msg): + print('\n{0}'.format(msg), file=sys.__stderr__) + +ARTLINES = [ + ' --------------', + '---- **** -----', + '--- * *** * --', + '-- * - **** ---', + '- ** ----------', + '- ** ----------', + '- ** ----------', + '- ** ----------', + '- *** --- * ---', + '-- ******* ----', + '--- ***** -----', + ' --------------', +] + +BANNER = """\ +{hostname} v{version} + +{platform} + +[config] +.> app: {app} +.> transport: {conninfo} +.> results: {results} +.> concurrency: {concurrency} + +[queues] +{queues} +""" + +EXTRA_INFO_FMT = """ +[tasks] +{tasks} +""" + + +class Worker(WorkController): + + def on_before_init(self, **kwargs): + trace.setup_worker_optimizations(self.app) + + # this signal can be used to set up configuration for + # workers by name. + signals.celeryd_init.send( + sender=self.hostname, instance=self, + conf=self.app.conf, options=kwargs, + ) + check_privileges(self.app.conf.CELERY_ACCEPT_CONTENT) + + def on_after_init(self, purge=False, no_color=None, + redirect_stdouts=None, redirect_stdouts_level=None, + **kwargs): + self.redirect_stdouts = self._getopt( + 'redirect_stdouts', redirect_stdouts, + ) + self.redirect_stdouts_level = self._getopt( + 'redirect_stdouts_level', redirect_stdouts_level, + ) + super(Worker, self).setup_defaults(**kwargs) + self.purge = purge + self.no_color = no_color + self._isatty = isatty(sys.stdout) + self.colored = self.app.log.colored( + self.logfile, + enabled=not no_color if no_color is not None else no_color + ) + + def on_init_blueprint(self): + self._custom_logging = self.setup_logging() + # apply task execution optimizations + # -- This will finalize the app! + trace.setup_worker_optimizations(self.app) + + def on_start(self): + if not self._custom_logging and self.redirect_stdouts: + self.app.log.redirect_stdouts(self.redirect_stdouts_level) + + WorkController.on_start(self) + + # this signal can be used to e.g. change queues after + # the -Q option has been applied. + signals.celeryd_after_setup.send( + sender=self.hostname, instance=self, conf=self.app.conf, + ) + + if not self.app.conf.value_set_for('CELERY_ACCEPT_CONTENT'): + warnings.warn(CDeprecationWarning(W_PICKLE_DEPRECATED)) + + if self.purge: + self.purge_messages() + + # Dump configuration to screen so we have some basic information + # for when users sends bug reports. + print(safe_str(''.join([ + string(self.colored.cyan(' \n', self.startup_info())), + string(self.colored.reset(self.extra_info() or '')), + ])), file=sys.__stdout__) + self.set_process_status('-active-') + self.install_platform_tweaks(self) + + def on_consumer_ready(self, consumer): + signals.worker_ready.send(sender=consumer) + print('{0} ready.'.format(safe_str(self.hostname), )) + + def setup_logging(self, colorize=None): + if colorize is None and self.no_color is not None: + colorize = not self.no_color + return self.app.log.setup( + self.loglevel, self.logfile, + redirect_stdouts=False, colorize=colorize, hostname=self.hostname, + ) + + def purge_messages(self): + count = self.app.control.purge() + if count: + print('purge: Erased {0} {1} from the queue.\n'.format( + count, pluralize(count, 'message'))) + + def tasklist(self, include_builtins=True, sep='\n', int_='celery.'): + return sep.join( + ' . {0}'.format(task) for task in sorted(self.app.tasks) + if (not task.startswith(int_) if not include_builtins else task) + ) + + def extra_info(self): + if self.loglevel <= logging.INFO: + include_builtins = self.loglevel <= logging.DEBUG + tasklist = self.tasklist(include_builtins=include_builtins) + return EXTRA_INFO_FMT.format(tasks=tasklist) + + def startup_info(self): + app = self.app + concurrency = string(self.concurrency) + appr = '{0}:0x{1:x}'.format(app.main or '__main__', id(app)) + if not isinstance(app.loader, AppLoader): + loader = qualname(app.loader) + if loader.startswith('celery.loaders'): + loader = loader[14:] + appr += ' ({0})'.format(loader) + if self.autoscale: + max, min = self.autoscale + concurrency = '{{min={0}, max={1}}}'.format(min, max) + pool = self.pool_cls + if not isinstance(pool, string_t): + pool = pool.__module__ + concurrency += ' ({0})'.format(pool.split('.')[-1]) + events = 'ON' + if not self.send_events: + events = 'OFF (enable -E to monitor this worker)' + + banner = BANNER.format( + app=appr, + hostname=safe_str(self.hostname), + version=VERSION_BANNER, + conninfo=self.app.connection().as_uri(), + results=self.app.conf.CELERY_RESULT_BACKEND or 'disabled', + concurrency=concurrency, + platform=safe_str(_platform.platform()), + events=events, + queues=app.amqp.queues.format(indent=0, indent_first=False), + ).splitlines() + + # integrate the ASCII art. + for i, x in enumerate(banner): + try: + banner[i] = ' '.join([ARTLINES[i], banner[i]]) + except IndexError: + banner[i] = ' ' * 16 + banner[i] + return '\n'.join(banner) + '\n' + + def install_platform_tweaks(self, worker): + """Install platform specific tweaks and workarounds.""" + if self.app.IS_OSX: + self.osx_proxy_detection_workaround() + + # Install signal handler so SIGHUP restarts the worker. + if not self._isatty: + # only install HUP handler if detached from terminal, + # so closing the terminal window doesn't restart the worker + # into the background. + if self.app.IS_OSX: + # OS X can't exec from a process using threads. + # See http://github.com/celery/celery/issues#issue/152 + install_HUP_not_supported_handler(worker) + else: + install_worker_restart_handler(worker) + install_worker_term_handler(worker) + install_worker_term_hard_handler(worker) + install_worker_int_handler(worker) + install_cry_handler() + install_rdb_handler() + + def osx_proxy_detection_workaround(self): + """See http://github.com/celery/celery/issues#issue/161""" + os.environ.setdefault('celery_dummy_proxy', 'set_by_celeryd') + + def set_process_status(self, info): + return platforms.set_mp_process_title( + 'celeryd', + info='{0} ({1})'.format(info, platforms.strargv(sys.argv)), + hostname=self.hostname, + ) + + +def _shutdown_handler(worker, sig='TERM', how='Warm', + exc=WorkerShutdown, callback=None): + + def _handle_request(*args): + with in_sighandler(): + from celery.worker import state + if current_process()._name == 'MainProcess': + if callback: + callback(worker) + safe_say('worker: {0} shutdown (MainProcess)'.format(how)) + if active_thread_count() > 1: + setattr(state, {'Warm': 'should_stop', + 'Cold': 'should_terminate'}[how], True) + else: + raise exc() + _handle_request.__name__ = str('worker_{0}'.format(how)) + platforms.signals[sig] = _handle_request +install_worker_term_handler = partial( + _shutdown_handler, sig='SIGTERM', how='Warm', exc=WorkerShutdown, +) +if not is_jython: # pragma: no cover + install_worker_term_hard_handler = partial( + _shutdown_handler, sig='SIGQUIT', how='Cold', exc=WorkerTerminate, + ) +else: # pragma: no cover + install_worker_term_handler = \ + install_worker_term_hard_handler = lambda *a, **kw: None + + +def on_SIGINT(worker): + safe_say('worker: Hitting Ctrl+C again will terminate all running tasks!') + install_worker_term_hard_handler(worker, sig='SIGINT') +if not is_jython: # pragma: no cover + install_worker_int_handler = partial( + _shutdown_handler, sig='SIGINT', callback=on_SIGINT + ) +else: # pragma: no cover + install_worker_int_handler = lambda *a, **kw: None + + +def _reload_current_worker(): + platforms.close_open_fds([ + sys.__stdin__, sys.__stdout__, sys.__stderr__, + ]) + os.execv(sys.executable, [sys.executable] + sys.argv) + + +def install_worker_restart_handler(worker, sig='SIGHUP'): + + def restart_worker_sig_handler(*args): + """Signal handler restarting the current python program.""" + set_in_sighandler(True) + safe_say('Restarting celery worker ({0})'.format(' '.join(sys.argv))) + import atexit + atexit.register(_reload_current_worker) + from celery.worker import state + state.should_stop = True + platforms.signals[sig] = restart_worker_sig_handler + + +def install_cry_handler(sig='SIGUSR1'): + # Jython/PyPy does not have sys._current_frames + if is_jython or is_pypy: # pragma: no cover + return + + def cry_handler(*args): + """Signal handler logging the stacktrace of all active threads.""" + with in_sighandler(): + safe_say(cry()) + platforms.signals[sig] = cry_handler + + +def install_rdb_handler(envvar='CELERY_RDBSIG', + sig='SIGUSR2'): # pragma: no cover + + def rdb_handler(*args): + """Signal handler setting a rdb breakpoint at the current frame.""" + with in_sighandler(): + from celery.contrib.rdb import set_trace, _frame + # gevent does not pass standard signal handler args + frame = args[1] if args else _frame().f_back + set_trace(frame) + if os.environ.get(envvar): + platforms.signals[sig] = rdb_handler + + +def install_HUP_not_supported_handler(worker, sig='SIGHUP'): + + def warn_on_HUP_handler(signum, frame): + with in_sighandler(): + safe_say('{sig} not supported: Restarting with {sig} is ' + 'unstable on this platform!'.format(sig=sig)) + platforms.signals[sig] = warn_on_HUP_handler diff --git a/celery/backends/__init__.py b/celery/backends/__init__.py new file mode 100644 index 0000000..fbe8a9c --- /dev/null +++ b/celery/backends/__init__.py @@ -0,0 +1,63 @@ +# -*- coding: utf-8 -*- +""" + celery.backends + ~~~~~~~~~~~~~~~ + + Backend abstract factory (...did I just say that?) and alias definitions. + +""" +from __future__ import absolute_import + +import sys + +from kombu.utils.url import _parse_url + +from celery.local import Proxy +from celery._state import current_app +from celery.five import reraise +from celery.utils.imports import symbol_by_name + +__all__ = ['get_backend_cls', 'get_backend_by_url'] + +UNKNOWN_BACKEND = """\ +Unknown result backend: {0!r}. Did you spell that correctly? ({1!r})\ +""" + +BACKEND_ALIASES = { + 'amqp': 'celery.backends.amqp:AMQPBackend', + 'rpc': 'celery.backends.rpc.RPCBackend', + 'cache': 'celery.backends.cache:CacheBackend', + 'redis': 'celery.backends.redis:RedisBackend', + 'mongodb': 'celery.backends.mongodb:MongoBackend', + 'db': 'celery.backends.database:DatabaseBackend', + 'database': 'celery.backends.database:DatabaseBackend', + 'cassandra': 'celery.backends.cassandra:CassandraBackend', + 'couchbase': 'celery.backends.couchbase:CouchBaseBackend', + 'disabled': 'celery.backends.base:DisabledBackend', +} + +#: deprecated alias to ``current_app.backend``. +default_backend = Proxy(lambda: current_app.backend) + + +def get_backend_cls(backend=None, loader=None): + """Get backend class by name/alias""" + backend = backend or 'disabled' + loader = loader or current_app.loader + aliases = dict(BACKEND_ALIASES, **loader.override_backends) + try: + return symbol_by_name(backend, aliases) + except ValueError as exc: + reraise(ValueError, ValueError(UNKNOWN_BACKEND.format( + backend, exc)), sys.exc_info()[2]) + + +def get_backend_by_url(backend=None, loader=None): + url = None + if backend and '://' in backend: + url = backend + if '+' in url[:url.index('://')]: + backend, url = url.split('+', 1) + else: + backend, _, _, _, _, _, _ = _parse_url(url) + return get_backend_cls(backend, loader), url diff --git a/celery/backends/amqp.py b/celery/backends/amqp.py new file mode 100644 index 0000000..62cf203 --- /dev/null +++ b/celery/backends/amqp.py @@ -0,0 +1,317 @@ +# -*- coding: utf-8 -*- +""" + celery.backends.amqp + ~~~~~~~~~~~~~~~~~~~~ + + The AMQP result backend. + + This backend publishes results as messages. + +""" +from __future__ import absolute_import + +import socket + +from collections import deque +from operator import itemgetter + +from kombu import Exchange, Queue, Producer, Consumer + +from celery import states +from celery.exceptions import TimeoutError +from celery.five import range, monotonic +from celery.utils.functional import dictfilter +from celery.utils.log import get_logger +from celery.utils.timeutils import maybe_s_to_ms + +from .base import BaseBackend + +__all__ = ['BacklogLimitExceeded', 'AMQPBackend'] + +logger = get_logger(__name__) + + +class BacklogLimitExceeded(Exception): + """Too much state history to fast-forward.""" + + +def repair_uuid(s): + # Historically the dashes in UUIDS are removed from AMQ entity names, + # but there is no known reason to. Hopefully we'll be able to fix + # this in v4.0. + return '%s-%s-%s-%s-%s' % (s[:8], s[8:12], s[12:16], s[16:20], s[20:]) + + +class NoCacheQueue(Queue): + can_cache_declaration = False + + +class AMQPBackend(BaseBackend): + """Publishes results by sending messages.""" + Exchange = Exchange + Queue = NoCacheQueue + Consumer = Consumer + Producer = Producer + + BacklogLimitExceeded = BacklogLimitExceeded + + persistent = True + supports_autoexpire = True + supports_native_join = True + + retry_policy = { + 'max_retries': 20, + 'interval_start': 0, + 'interval_step': 1, + 'interval_max': 1, + } + + def __init__(self, app, connection=None, exchange=None, exchange_type=None, + persistent=None, serializer=None, auto_delete=True, **kwargs): + super(AMQPBackend, self).__init__(app, **kwargs) + conf = self.app.conf + self._connection = connection + self.persistent = self.prepare_persistent(persistent) + self.delivery_mode = 2 if self.persistent else 1 + exchange = exchange or conf.CELERY_RESULT_EXCHANGE + exchange_type = exchange_type or conf.CELERY_RESULT_EXCHANGE_TYPE + self.exchange = self._create_exchange( + exchange, exchange_type, self.delivery_mode, + ) + self.serializer = serializer or conf.CELERY_RESULT_SERIALIZER + self.auto_delete = auto_delete + + self.expires = None + if 'expires' not in kwargs or kwargs['expires'] is not None: + self.expires = self.prepare_expires(kwargs.get('expires')) + self.queue_arguments = dictfilter({ + 'x-expires': maybe_s_to_ms(self.expires), + }) + + def _create_exchange(self, name, type='direct', delivery_mode=2): + return self.Exchange(name=name, + type=type, + delivery_mode=delivery_mode, + durable=self.persistent, + auto_delete=False) + + def _create_binding(self, task_id): + name = self.rkey(task_id) + return self.Queue(name=name, + exchange=self.exchange, + routing_key=name, + durable=self.persistent, + auto_delete=self.auto_delete, + queue_arguments=self.queue_arguments) + + def revive(self, channel): + pass + + def rkey(self, task_id): + return task_id.replace('-', '') + + def destination_for(self, task_id, request): + if request: + return self.rkey(task_id), request.correlation_id or task_id + return self.rkey(task_id), task_id + + def store_result(self, task_id, result, status, + traceback=None, request=None, **kwargs): + """Send task return value and status.""" + routing_key, correlation_id = self.destination_for(task_id, request) + if not routing_key: + return + with self.app.amqp.producer_pool.acquire(block=True) as producer: + producer.publish( + {'task_id': task_id, 'status': status, + 'result': self.encode_result(result, status), + 'traceback': traceback, + 'children': self.current_task_children(request)}, + exchange=self.exchange, + routing_key=routing_key, + correlation_id=correlation_id, + serializer=self.serializer, + retry=True, retry_policy=self.retry_policy, + declare=self.on_reply_declare(task_id), + delivery_mode=self.delivery_mode, + ) + return result + + def on_reply_declare(self, task_id): + return [self._create_binding(task_id)] + + def wait_for(self, task_id, timeout=None, cache=True, propagate=True, + no_ack=True, on_interval=None, + READY_STATES=states.READY_STATES, + PROPAGATE_STATES=states.PROPAGATE_STATES, + **kwargs): + cached_meta = self._cache.get(task_id) + if cache and cached_meta and \ + cached_meta['status'] in READY_STATES: + meta = cached_meta + else: + try: + meta = self.consume(task_id, timeout=timeout, no_ack=no_ack, + on_interval=on_interval) + except socket.timeout: + raise TimeoutError('The operation timed out.') + + if meta['status'] in PROPAGATE_STATES and propagate: + raise self.exception_to_python(meta['result']) + # consume() always returns READY_STATE. + return meta['result'] + + def get_task_meta(self, task_id, backlog_limit=1000): + # Polling and using basic_get + with self.app.pool.acquire_channel(block=True) as (_, channel): + binding = self._create_binding(task_id)(channel) + binding.declare() + + prev = latest = acc = None + for i in range(backlog_limit): # spool ffwd + acc = binding.get( + accept=self.accept, no_ack=False, + ) + if not acc: # no more messages + break + if acc.payload['task_id'] == task_id: + prev, latest = latest, acc + if prev: + # backends are not expected to keep history, + # so we delete everything except the most recent state. + prev.ack() + prev = None + else: + raise self.BacklogLimitExceeded(task_id) + + if latest: + payload = self._cache[task_id] = latest.payload + latest.requeue() + return payload + else: + # no new state, use previous + try: + return self._cache[task_id] + except KeyError: + # result probably pending. + return {'status': states.PENDING, 'result': None} + poll = get_task_meta # XXX compat + + def drain_events(self, connection, consumer, + timeout=None, on_interval=None, now=monotonic, wait=None): + wait = wait or connection.drain_events + results = {} + + def callback(meta, message): + if meta['status'] in states.READY_STATES: + results[meta['task_id']] = meta + + consumer.callbacks[:] = [callback] + time_start = now() + + while 1: + # Total time spent may exceed a single call to wait() + if timeout and now() - time_start >= timeout: + raise socket.timeout() + wait(timeout=timeout) + if on_interval: + on_interval() + if results: # got event on the wanted channel. + break + self._cache.update(results) + return results + + def consume(self, task_id, timeout=None, no_ack=True, on_interval=None): + wait = self.drain_events + with self.app.pool.acquire_channel(block=True) as (conn, channel): + binding = self._create_binding(task_id) + with self.Consumer(channel, binding, + no_ack=no_ack, accept=self.accept) as consumer: + while 1: + try: + return wait( + conn, consumer, timeout, on_interval)[task_id] + except KeyError: + continue + + def _many_bindings(self, ids): + return [self._create_binding(task_id) for task_id in ids] + + def get_many(self, task_ids, timeout=None, no_ack=True, + now=monotonic, getfields=itemgetter('status', 'task_id'), + READY_STATES=states.READY_STATES, + PROPAGATE_STATES=states.PROPAGATE_STATES, **kwargs): + with self.app.pool.acquire_channel(block=True) as (conn, channel): + ids = set(task_ids) + cached_ids = set() + mark_cached = cached_ids.add + for task_id in ids: + try: + cached = self._cache[task_id] + except KeyError: + pass + else: + if cached['status'] in READY_STATES: + yield task_id, cached + mark_cached(task_id) + ids.difference_update(cached_ids) + results = deque() + push_result = results.append + push_cache = self._cache.__setitem__ + to_exception = self.exception_to_python + + def on_message(message): + body = message.decode() + state, uid = getfields(body) + if state in READY_STATES: + if state in PROPAGATE_STATES: + body['result'] = to_exception(body['result']) + push_result(body) \ + if uid in task_ids else push_cache(uid, body) + + bindings = self._many_bindings(task_ids) + with self.Consumer(channel, bindings, on_message=on_message, + accept=self.accept, no_ack=no_ack): + wait = conn.drain_events + popleft = results.popleft + while ids: + wait(timeout=timeout) + while results: + state = popleft() + task_id = state['task_id'] + ids.discard(task_id) + push_cache(task_id, state) + yield task_id, state + + def reload_task_result(self, task_id): + raise NotImplementedError( + 'reload_task_result is not supported by this backend.') + + def reload_group_result(self, task_id): + """Reload group result, even if it has been previously fetched.""" + raise NotImplementedError( + 'reload_group_result is not supported by this backend.') + + def save_group(self, group_id, result): + raise NotImplementedError( + 'save_group is not supported by this backend.') + + def restore_group(self, group_id, cache=True): + raise NotImplementedError( + 'restore_group is not supported by this backend.') + + def delete_group(self, group_id): + raise NotImplementedError( + 'delete_group is not supported by this backend.') + + def __reduce__(self, args=(), kwargs={}): + kwargs.update( + connection=self._connection, + exchange=self.exchange.name, + exchange_type=self.exchange.type, + persistent=self.persistent, + serializer=self.serializer, + auto_delete=self.auto_delete, + expires=self.expires, + ) + return super(AMQPBackend, self).__reduce__(args, kwargs) diff --git a/celery/backends/base.py b/celery/backends/base.py new file mode 100644 index 0000000..48bda24 --- /dev/null +++ b/celery/backends/base.py @@ -0,0 +1,596 @@ +# -*- coding: utf-8 -*- +""" + celery.backends.base + ~~~~~~~~~~~~~~~~~~~~ + + Result backend base classes. + + - :class:`BaseBackend` defines the interface. + + - :class:`KeyValueStoreBackend` is a common base class + using K/V semantics like _get and _put. + +""" +from __future__ import absolute_import + +import time +import sys + +from datetime import timedelta + +from billiard.einfo import ExceptionInfo +from kombu.serialization import ( + dumps, loads, prepare_accept_content, + registry as serializer_registry, +) +from kombu.utils.encoding import bytes_to_str, ensure_bytes, from_utf8 + +from celery import states +from celery import current_app, maybe_signature +from celery.app import current_task +from celery.exceptions import ChordError, TimeoutError, TaskRevokedError +from celery.five import items +from celery.result import ( + GroupResult, ResultBase, allow_join_result, result_from_tuple, +) +from celery.utils import timeutils +from celery.utils.functional import LRUCache +from celery.utils.log import get_logger +from celery.utils.serialization import ( + get_pickled_exception, + get_pickleable_exception, + create_exception_cls, +) + +__all__ = ['BaseBackend', 'KeyValueStoreBackend', 'DisabledBackend'] + +EXCEPTION_ABLE_CODECS = frozenset(['pickle']) +PY3 = sys.version_info >= (3, 0) + +logger = get_logger(__name__) + + +def unpickle_backend(cls, args, kwargs): + """Return an unpickled backend.""" + return cls(*args, app=current_app._get_current_object(), **kwargs) + + +class _nulldict(dict): + + def ignore(self, *a, **kw): + pass + __setitem__ = update = setdefault = ignore + + +class BaseBackend(object): + READY_STATES = states.READY_STATES + UNREADY_STATES = states.UNREADY_STATES + EXCEPTION_STATES = states.EXCEPTION_STATES + + TimeoutError = TimeoutError + + #: Time to sleep between polling each individual item + #: in `ResultSet.iterate`. as opposed to the `interval` + #: argument which is for each pass. + subpolling_interval = None + + #: If true the backend must implement :meth:`get_many`. + supports_native_join = False + + #: If true the backend must automatically expire results. + #: The daily backend_cleanup periodic task will not be triggered + #: in this case. + supports_autoexpire = False + + #: Set to true if the backend is peristent by default. + persistent = True + + retry_policy = { + 'max_retries': 20, + 'interval_start': 0, + 'interval_step': 1, + 'interval_max': 1, + } + + def __init__(self, app, serializer=None, + max_cached_results=None, accept=None, **kwargs): + self.app = app + conf = self.app.conf + self.serializer = serializer or conf.CELERY_RESULT_SERIALIZER + (self.content_type, + self.content_encoding, + self.encoder) = serializer_registry._encoders[self.serializer] + cmax = max_cached_results or conf.CELERY_MAX_CACHED_RESULTS + self._cache = _nulldict() if cmax == -1 else LRUCache(limit=cmax) + self.accept = prepare_accept_content( + conf.CELERY_ACCEPT_CONTENT if accept is None else accept, + ) + + def mark_as_started(self, task_id, **meta): + """Mark a task as started""" + return self.store_result(task_id, meta, status=states.STARTED) + + def mark_as_done(self, task_id, result, request=None): + """Mark task as successfully executed.""" + return self.store_result(task_id, result, + status=states.SUCCESS, request=request) + + def mark_as_failure(self, task_id, exc, traceback=None, request=None): + """Mark task as executed with failure. Stores the execption.""" + return self.store_result(task_id, exc, status=states.FAILURE, + traceback=traceback, request=request) + + def chord_error_from_stack(self, callback, exc=None): + from celery import group + app = self.app + backend = app._tasks[callback.task].backend + try: + group( + [app.signature(errback) + for errback in callback.options.get('link_error') or []], + app=app, + ).apply_async((callback.id, )) + except Exception as eb_exc: + return backend.fail_from_current_stack(callback.id, exc=eb_exc) + else: + return backend.fail_from_current_stack(callback.id, exc=exc) + + def fail_from_current_stack(self, task_id, exc=None): + type_, real_exc, tb = sys.exc_info() + try: + exc = real_exc if exc is None else exc + ei = ExceptionInfo((type_, exc, tb)) + self.mark_as_failure(task_id, exc, ei.traceback) + return ei + finally: + del(tb) + + def mark_as_retry(self, task_id, exc, traceback=None, request=None): + """Mark task as being retries. Stores the current + exception (if any).""" + return self.store_result(task_id, exc, status=states.RETRY, + traceback=traceback, request=request) + + def mark_as_revoked(self, task_id, reason='', request=None): + return self.store_result(task_id, TaskRevokedError(reason), + status=states.REVOKED, traceback=None, + request=request) + + def prepare_exception(self, exc, serializer=None): + """Prepare exception for serialization.""" + serializer = self.serializer if serializer is None else serializer + if serializer in EXCEPTION_ABLE_CODECS: + return get_pickleable_exception(exc) + return {'exc_type': type(exc).__name__, 'exc_message': str(exc)} + + def exception_to_python(self, exc): + """Convert serialized exception to Python exception.""" + if self.serializer in EXCEPTION_ABLE_CODECS: + return get_pickled_exception(exc) + elif not isinstance(exc, BaseException): + return create_exception_cls( + from_utf8(exc['exc_type']), __name__)(exc['exc_message']) + return exc + + def prepare_value(self, result): + """Prepare value for storage.""" + if self.serializer != 'pickle' and isinstance(result, ResultBase): + return result.as_tuple() + return result + + def encode(self, data): + _, _, payload = dumps(data, serializer=self.serializer) + return payload + + def decode(self, payload): + payload = PY3 and payload or str(payload) + return loads(payload, + content_type=self.content_type, + content_encoding=self.content_encoding, + accept=self.accept) + + def wait_for(self, task_id, + timeout=None, propagate=True, interval=0.5, no_ack=True, + on_interval=None): + """Wait for task and return its result. + + If the task raises an exception, this exception + will be re-raised by :func:`wait_for`. + + If `timeout` is not :const:`None`, this raises the + :class:`celery.exceptions.TimeoutError` exception if the operation + takes longer than `timeout` seconds. + + """ + + time_elapsed = 0.0 + + while 1: + status = self.get_status(task_id) + if status == states.SUCCESS: + return self.get_result(task_id) + elif status in states.PROPAGATE_STATES: + result = self.get_result(task_id) + if propagate: + raise result + return result + if on_interval: + on_interval() + # avoid hammering the CPU checking status. + time.sleep(interval) + time_elapsed += interval + if timeout and time_elapsed >= timeout: + raise TimeoutError('The operation timed out.') + + def prepare_expires(self, value, type=None): + if value is None: + value = self.app.conf.CELERY_TASK_RESULT_EXPIRES + if isinstance(value, timedelta): + value = timeutils.timedelta_seconds(value) + if value is not None and type: + return type(value) + return value + + def prepare_persistent(self, enabled=None): + if enabled is not None: + return enabled + p = self.app.conf.CELERY_RESULT_PERSISTENT + return self.persistent if p is None else p + + def encode_result(self, result, status): + if status in self.EXCEPTION_STATES and isinstance(result, Exception): + return self.prepare_exception(result) + else: + return self.prepare_value(result) + + def is_cached(self, task_id): + return task_id in self._cache + + def store_result(self, task_id, result, status, + traceback=None, request=None, **kwargs): + """Update task state and result.""" + result = self.encode_result(result, status) + self._store_result(task_id, result, status, traceback, + request=request, **kwargs) + return result + + def forget(self, task_id): + self._cache.pop(task_id, None) + self._forget(task_id) + + def _forget(self, task_id): + raise NotImplementedError('backend does not implement forget.') + + def get_status(self, task_id): + """Get the status of a task.""" + return self.get_task_meta(task_id)['status'] + + def get_traceback(self, task_id): + """Get the traceback for a failed task.""" + return self.get_task_meta(task_id).get('traceback') + + def get_result(self, task_id): + """Get the result of a task.""" + meta = self.get_task_meta(task_id) + if meta['status'] in self.EXCEPTION_STATES: + return self.exception_to_python(meta['result']) + else: + return meta['result'] + + def get_children(self, task_id): + """Get the list of subtasks sent by a task.""" + try: + return self.get_task_meta(task_id)['children'] + except KeyError: + pass + + def get_task_meta(self, task_id, cache=True): + if cache: + try: + return self._cache[task_id] + except KeyError: + pass + + meta = self._get_task_meta_for(task_id) + if cache and meta.get('status') == states.SUCCESS: + self._cache[task_id] = meta + return meta + + def reload_task_result(self, task_id): + """Reload task result, even if it has been previously fetched.""" + self._cache[task_id] = self.get_task_meta(task_id, cache=False) + + def reload_group_result(self, group_id): + """Reload group result, even if it has been previously fetched.""" + self._cache[group_id] = self.get_group_meta(group_id, cache=False) + + def get_group_meta(self, group_id, cache=True): + if cache: + try: + return self._cache[group_id] + except KeyError: + pass + + meta = self._restore_group(group_id) + if cache and meta is not None: + self._cache[group_id] = meta + return meta + + def restore_group(self, group_id, cache=True): + """Get the result for a group.""" + meta = self.get_group_meta(group_id, cache=cache) + if meta: + return meta['result'] + + def save_group(self, group_id, result): + """Store the result of an executed group.""" + return self._save_group(group_id, result) + + def delete_group(self, group_id): + self._cache.pop(group_id, None) + return self._delete_group(group_id) + + def cleanup(self): + """Backend cleanup. Is run by + :class:`celery.task.DeleteExpiredTaskMetaTask`.""" + pass + + def process_cleanup(self): + """Cleanup actions to do at the end of a task worker process.""" + pass + + def on_task_call(self, producer, task_id): + return {} + + def on_chord_part_return(self, task, state, result, propagate=False): + pass + + def fallback_chord_unlock(self, group_id, body, result=None, + countdown=1, **kwargs): + kwargs['result'] = [r.as_tuple() for r in result] + self.app.tasks['celery.chord_unlock'].apply_async( + (group_id, body, ), kwargs, countdown=countdown, + ) + + def apply_chord(self, header, partial_args, group_id, body, **options): + result = header(*partial_args, task_id=group_id) + self.fallback_chord_unlock(group_id, body, **options) + return result + + def current_task_children(self, request=None): + request = request or getattr(current_task(), 'request', None) + if request: + return [r.as_tuple() for r in getattr(request, 'children', [])] + + def __reduce__(self, args=(), kwargs={}): + return (unpickle_backend, (self.__class__, args, kwargs)) +BaseDictBackend = BaseBackend # XXX compat + + +class KeyValueStoreBackend(BaseBackend): + key_t = ensure_bytes + task_keyprefix = 'celery-task-meta-' + group_keyprefix = 'celery-taskset-meta-' + chord_keyprefix = 'chord-unlock-' + implements_incr = False + + def __init__(self, *args, **kwargs): + if hasattr(self.key_t, '__func__'): + self.key_t = self.key_t.__func__ # remove binding + self._encode_prefixes() + super(KeyValueStoreBackend, self).__init__(*args, **kwargs) + if self.implements_incr: + self.apply_chord = self._apply_chord_incr + + def _encode_prefixes(self): + self.task_keyprefix = self.key_t(self.task_keyprefix) + self.group_keyprefix = self.key_t(self.group_keyprefix) + self.chord_keyprefix = self.key_t(self.chord_keyprefix) + + def get(self, key): + raise NotImplementedError('Must implement the get method.') + + def mget(self, keys): + raise NotImplementedError('Does not support get_many') + + def set(self, key, value): + raise NotImplementedError('Must implement the set method.') + + def delete(self, key): + raise NotImplementedError('Must implement the delete method') + + def incr(self, key): + raise NotImplementedError('Does not implement incr') + + def expire(self, key, value): + pass + + def get_key_for_task(self, task_id, key=''): + """Get the cache key for a task by id.""" + key_t = self.key_t + return key_t('').join([ + self.task_keyprefix, key_t(task_id), key_t(key), + ]) + + def get_key_for_group(self, group_id, key=''): + """Get the cache key for a group by id.""" + key_t = self.key_t + return key_t('').join([ + self.group_keyprefix, key_t(group_id), key_t(key), + ]) + + def get_key_for_chord(self, group_id, key=''): + """Get the cache key for the chord waiting on group with given id.""" + key_t = self.key_t + return key_t('').join([ + self.chord_keyprefix, key_t(group_id), key_t(key), + ]) + + def _strip_prefix(self, key): + """Takes bytes, emits string.""" + key = self.key_t(key) + for prefix in self.task_keyprefix, self.group_keyprefix: + if key.startswith(prefix): + return bytes_to_str(key[len(prefix):]) + return bytes_to_str(key) + + def _mget_to_results(self, values, keys): + if hasattr(values, 'items'): + # client returns dict so mapping preserved. + return dict((self._strip_prefix(k), self.decode(v)) + for k, v in items(values) + if v is not None) + else: + # client returns list so need to recreate mapping. + return dict((bytes_to_str(keys[i]), self.decode(value)) + for i, value in enumerate(values) + if value is not None) + + def get_many(self, task_ids, timeout=None, interval=0.5, no_ack=True, + READY_STATES=states.READY_STATES): + interval = 0.5 if interval is None else interval + ids = task_ids if isinstance(task_ids, set) else set(task_ids) + cached_ids = set() + cache = self._cache + for task_id in ids: + try: + cached = cache[task_id] + except KeyError: + pass + else: + if cached['status'] in READY_STATES: + yield bytes_to_str(task_id), cached + cached_ids.add(task_id) + + ids.difference_update(cached_ids) + iterations = 0 + while ids: + keys = list(ids) + r = self._mget_to_results(self.mget([self.get_key_for_task(k) + for k in keys]), keys) + cache.update(r) + ids.difference_update(set(bytes_to_str(v) for v in r)) + for key, value in items(r): + yield bytes_to_str(key), value + if timeout and iterations * interval >= timeout: + raise TimeoutError('Operation timed out ({0})'.format(timeout)) + time.sleep(interval) # don't busy loop. + iterations += 1 + + def _forget(self, task_id): + self.delete(self.get_key_for_task(task_id)) + + def _store_result(self, task_id, result, status, + traceback=None, request=None, **kwargs): + meta = {'status': status, 'result': result, 'traceback': traceback, + 'children': self.current_task_children(request)} + self.set(self.get_key_for_task(task_id), self.encode(meta)) + return result + + def _save_group(self, group_id, result): + self.set(self.get_key_for_group(group_id), + self.encode({'result': result.as_tuple()})) + return result + + def _delete_group(self, group_id): + self.delete(self.get_key_for_group(group_id)) + + def _get_task_meta_for(self, task_id): + """Get task metadata for a task by id.""" + meta = self.get(self.get_key_for_task(task_id)) + if not meta: + return {'status': states.PENDING, 'result': None} + return self.decode(meta) + + def _restore_group(self, group_id): + """Get task metadata for a task by id.""" + meta = self.get(self.get_key_for_group(group_id)) + # previously this was always pickled, but later this + # was extended to support other serializers, so the + # structure is kind of weird. + if meta: + meta = self.decode(meta) + result = meta['result'] + meta['result'] = result_from_tuple(result, self.app) + return meta + + def _apply_chord_incr(self, header, partial_args, group_id, body, + result=None, **options): + self.save_group(group_id, self.app.GroupResult(group_id, result)) + return header(*partial_args, task_id=group_id) + + def on_chord_part_return(self, task, state, result, propagate=None): + if not self.implements_incr: + return + app = self.app + if propagate is None: + propagate = app.conf.CELERY_CHORD_PROPAGATES + gid = task.request.group + if not gid: + return + key = self.get_key_for_chord(gid) + try: + deps = GroupResult.restore(gid, backend=task.backend) + except Exception as exc: + callback = maybe_signature(task.request.chord, app=app) + logger.error('Chord %r raised: %r', gid, exc, exc_info=1) + return self.chord_error_from_stack( + callback, + ChordError('Cannot restore group: {0!r}'.format(exc)), + ) + if deps is None: + try: + raise ValueError(gid) + except ValueError as exc: + callback = maybe_signature(task.request.chord, app=app) + logger.error('Chord callback %r raised: %r', gid, exc, + exc_info=1) + return self.chord_error_from_stack( + callback, + ChordError('GroupResult {0} no longer exists'.format(gid)), + ) + val = self.incr(key) + if val >= len(deps): + callback = maybe_signature(task.request.chord, app=app) + j = deps.join_native if deps.supports_native_join else deps.join + try: + with allow_join_result(): + ret = j(timeout=3.0, propagate=propagate) + except Exception as exc: + try: + culprit = next(deps._failed_join_report()) + reason = 'Dependency {0.id} raised {1!r}'.format( + culprit, exc, + ) + except StopIteration: + reason = repr(exc) + + logger.error('Chord %r raised: %r', gid, reason, exc_info=1) + self.chord_error_from_stack(callback, ChordError(reason)) + else: + try: + callback.delay(ret) + except Exception as exc: + logger.error('Chord %r raised: %r', gid, exc, exc_info=1) + self.chord_error_from_stack( + callback, + ChordError('Callback error: {0!r}'.format(exc)), + ) + finally: + deps.delete() + self.client.delete(key) + else: + self.expire(key, 86400) + + +class DisabledBackend(BaseBackend): + _cache = {} # need this attribute to reset cache in tests. + + def store_result(self, *args, **kwargs): + pass + + def _is_disabled(self, *args, **kwargs): + raise NotImplementedError( + 'No result backend configured. ' + 'Please see the documentation for more information.') + wait_for = get_status = get_result = get_traceback = _is_disabled diff --git a/celery/backends/cache.py b/celery/backends/cache.py new file mode 100644 index 0000000..ac87100 --- /dev/null +++ b/celery/backends/cache.py @@ -0,0 +1,151 @@ +# -*- coding: utf-8 -*- +""" + celery.backends.cache + ~~~~~~~~~~~~~~~~~~~~~ + + Memcache and in-memory cache result backend. + +""" +from __future__ import absolute_import + +import sys + +from kombu.utils import cached_property +from kombu.utils.encoding import bytes_to_str, ensure_bytes + +from celery.exceptions import ImproperlyConfigured +from celery.utils.functional import LRUCache + +from .base import KeyValueStoreBackend + +__all__ = ['CacheBackend'] + +_imp = [None] + +PY3 = sys.version_info[0] == 3 + +REQUIRES_BACKEND = """\ +The memcached backend requires either pylibmc or python-memcached.\ +""" + +UNKNOWN_BACKEND = """\ +The cache backend {0!r} is unknown, +Please use one of the following backends instead: {1}\ +""" + + +def import_best_memcache(): + if _imp[0] is None: + is_pylibmc, memcache_key_t = False, ensure_bytes + try: + import pylibmc as memcache + is_pylibmc = True + except ImportError: + try: + import memcache # noqa + except ImportError: + raise ImproperlyConfigured(REQUIRES_BACKEND) + if PY3: + memcache_key_t = bytes_to_str + _imp[0] = (is_pylibmc, memcache, memcache_key_t) + return _imp[0] + + +def get_best_memcache(*args, **kwargs): + is_pylibmc, memcache, key_t = import_best_memcache() + Client = _Client = memcache.Client + + if not is_pylibmc: + def Client(*args, **kwargs): # noqa + kwargs.pop('behaviors', None) + return _Client(*args, **kwargs) + + return Client, key_t + + +class DummyClient(object): + + def __init__(self, *args, **kwargs): + self.cache = LRUCache(limit=5000) + + def get(self, key, *args, **kwargs): + return self.cache.get(key) + + def get_multi(self, keys): + cache = self.cache + return dict((k, cache[k]) for k in keys if k in cache) + + def set(self, key, value, *args, **kwargs): + self.cache[key] = value + + def delete(self, key, *args, **kwargs): + self.cache.pop(key, None) + + def incr(self, key, delta=1): + return self.cache.incr(key, delta) + + +backends = {'memcache': get_best_memcache, + 'memcached': get_best_memcache, + 'pylibmc': get_best_memcache, + 'memory': lambda: (DummyClient, ensure_bytes)} + + +class CacheBackend(KeyValueStoreBackend): + servers = None + supports_autoexpire = True + supports_native_join = True + implements_incr = True + + def __init__(self, app, expires=None, backend=None, + options={}, url=None, **kwargs): + super(CacheBackend, self).__init__(app, **kwargs) + + self.options = dict(self.app.conf.CELERY_CACHE_BACKEND_OPTIONS, + **options) + + self.backend = url or backend or self.app.conf.CELERY_CACHE_BACKEND + if self.backend: + self.backend, _, servers = self.backend.partition('://') + self.servers = servers.rstrip('/').split(';') + self.expires = self.prepare_expires(expires, type=int) + try: + self.Client, self.key_t = backends[self.backend]() + except KeyError: + raise ImproperlyConfigured(UNKNOWN_BACKEND.format( + self.backend, ', '.join(backends))) + self._encode_prefixes() # rencode the keyprefixes + + def get(self, key): + return self.client.get(key) + + def mget(self, keys): + return self.client.get_multi(keys) + + def set(self, key, value): + return self.client.set(key, value, self.expires) + + def delete(self, key): + return self.client.delete(key) + + def _apply_chord_incr(self, header, partial_args, group_id, body, **opts): + self.client.set(self.get_key_for_chord(group_id), '0', time=86400) + return super(CacheBackend, self)._apply_chord_incr( + header, partial_args, group_id, body, **opts + ) + + def incr(self, key): + return self.client.incr(key) + + @cached_property + def client(self): + return self.Client(self.servers, **self.options) + + def __reduce__(self, args=(), kwargs={}): + servers = ';'.join(self.servers) + backend = '{0}://{1}/'.format(self.backend, servers) + kwargs.update( + dict(backend=backend, + expires=self.expires, + options=self.options)) + return super(CacheBackend, self).__reduce__(args, kwargs) diff --git a/celery/backends/cassandra.py b/celery/backends/cassandra.py new file mode 100644 index 0000000..774e6b7 --- /dev/null +++ b/celery/backends/cassandra.py @@ -0,0 +1,194 @@ +# -* coding: utf-8 -*- +""" + celery.backends.cassandra + ~~~~~~~~~~~~~~~~~~~~~~~~~ + + Apache Cassandra result store backend. + +""" +from __future__ import absolute_import + +try: # pragma: no cover + import pycassa + from thrift import Thrift + C = pycassa.cassandra.ttypes +except ImportError: # pragma: no cover + pycassa = None # noqa + +import socket +import time + +from celery import states +from celery.exceptions import ImproperlyConfigured +from celery.five import monotonic +from celery.utils.log import get_logger +from celery.utils.timeutils import maybe_timedelta, timedelta_seconds + +from .base import BaseBackend + +__all__ = ['CassandraBackend'] + +logger = get_logger(__name__) + + +class CassandraBackend(BaseBackend): + """Highly fault tolerant Cassandra backend. + + .. attribute:: servers + + List of Cassandra servers with format: ``hostname:port``. + + :raises celery.exceptions.ImproperlyConfigured: if + module :mod:`pycassa` is not available. + + """ + servers = [] + keyspace = None + column_family = None + detailed_mode = False + _retry_timeout = 300 + _retry_wait = 3 + supports_autoexpire = True + + def __init__(self, servers=None, keyspace=None, column_family=None, + cassandra_options=None, detailed_mode=False, **kwargs): + """Initialize Cassandra backend. + + Raises :class:`celery.exceptions.ImproperlyConfigured` if + the :setting:`CASSANDRA_SERVERS` setting is not set. + + """ + super(CassandraBackend, self).__init__(**kwargs) + + self.expires = kwargs.get('expires') or maybe_timedelta( + self.app.conf.CELERY_TASK_RESULT_EXPIRES) + + if not pycassa: + raise ImproperlyConfigured( + 'You need to install the pycassa library to use the ' + 'Cassandra backend. See https://github.com/pycassa/pycassa') + + conf = self.app.conf + self.servers = (servers or + conf.get('CASSANDRA_SERVERS') or + self.servers) + self.keyspace = (keyspace or + conf.get('CASSANDRA_KEYSPACE') or + self.keyspace) + self.column_family = (column_family or + conf.get('CASSANDRA_COLUMN_FAMILY') or + self.column_family) + self.cassandra_options = dict(conf.get('CASSANDRA_OPTIONS') or {}, + **cassandra_options or {}) + self.detailed_mode = (detailed_mode or + conf.get('CASSANDRA_DETAILED_MODE') or + self.detailed_mode) + read_cons = conf.get('CASSANDRA_READ_CONSISTENCY') or 'LOCAL_QUORUM' + write_cons = conf.get('CASSANDRA_WRITE_CONSISTENCY') or 'LOCAL_QUORUM' + try: + self.read_consistency = getattr(pycassa.ConsistencyLevel, + read_cons) + except AttributeError: + self.read_consistency = pycassa.ConsistencyLevel.LOCAL_QUORUM + try: + self.write_consistency = getattr(pycassa.ConsistencyLevel, + write_cons) + except AttributeError: + self.write_consistency = pycassa.ConsistencyLevel.LOCAL_QUORUM + + if not self.servers or not self.keyspace or not self.column_family: + raise ImproperlyConfigured( + 'Cassandra backend not configured.') + + self._column_family = None + + def _retry_on_error(self, fun, *args, **kwargs): + ts = monotonic() + self._retry_timeout + while 1: + try: + return fun(*args, **kwargs) + except (pycassa.InvalidRequestException, + pycassa.TimedOutException, + pycassa.UnavailableException, + pycassa.AllServersUnavailable, + socket.error, + socket.timeout, + Thrift.TException) as exc: + if monotonic() > ts: + raise + logger.warning('Cassandra error: %r. Retrying...', exc) + time.sleep(self._retry_wait) + + def _get_column_family(self): + if self._column_family is None: + conn = pycassa.ConnectionPool(self.keyspace, + server_list=self.servers, + **self.cassandra_options) + self._column_family = pycassa.ColumnFamily( + conn, self.column_family, + read_consistency_level=self.read_consistency, + write_consistency_level=self.write_consistency, + ) + return self._column_family + + def process_cleanup(self): + if self._column_family is not None: + self._column_family = None + + def _store_result(self, task_id, result, status, + traceback=None, request=None, **kwargs): + """Store return value and status of an executed task.""" + + def _do_store(): + cf = self._get_column_family() + date_done = self.app.now() + meta = {'status': status, + 'date_done': date_done.strftime('%Y-%m-%dT%H:%M:%SZ'), + 'traceback': self.encode(traceback), + 'children': self.encode( + self.current_task_children(request), + )} + if self.detailed_mode: + meta['result'] = result + cf.insert(task_id, {date_done: self.encode(meta)}, + ttl=self.expires and timedelta_seconds(self.expires)) + else: + meta['result'] = self.encode(result) + cf.insert(task_id, meta, + ttl=self.expires and timedelta_seconds(self.expires)) + + return self._retry_on_error(_do_store) + + def _get_task_meta_for(self, task_id): + """Get task metadata for a task by id.""" + + def _do_get(): + cf = self._get_column_family() + try: + if self.detailed_mode: + row = cf.get(task_id, column_reversed=True, column_count=1) + meta = self.decode(list(row.values())[0]) + meta['task_id'] = task_id + else: + obj = cf.get(task_id) + meta = { + 'task_id': task_id, + 'status': obj['status'], + 'result': self.decode(obj['result']), + 'date_done': obj['date_done'], + 'traceback': self.decode(obj['traceback']), + 'children': self.decode(obj['children']), + } + except (KeyError, pycassa.NotFoundException): + meta = {'status': states.PENDING, 'result': None} + return meta + + return self._retry_on_error(_do_get) + + def __reduce__(self, args=(), kwargs={}): + kwargs.update( + dict(servers=self.servers, + keyspace=self.keyspace, + column_family=self.column_family, + cassandra_options=self.cassandra_options)) + return super(CassandraBackend, self).__reduce__(args, kwargs) diff --git a/celery/backends/couchbase.py b/celery/backends/couchbase.py new file mode 100644 index 0000000..2d51b80 --- /dev/null +++ b/celery/backends/couchbase.py @@ -0,0 +1,116 @@ +# -*- coding: utf-8 -*- +""" + celery.backends.couchbase + ~~~~~~~~~~~~~~~~~~~~~~~~~ + + CouchBase result store backend. + +""" +from __future__ import absolute_import + +import logging + +try: + from couchbase import Couchbase + from couchbase.connection import Connection + from couchbase.exceptions import NotFoundError +except ImportError: + Couchbase = Connection = NotFoundError = None # noqa + +from kombu.utils.url import _parse_url + +from celery.exceptions import ImproperlyConfigured +from celery.utils.timeutils import maybe_timedelta + +from .base import KeyValueStoreBackend + +__all__ = ['CouchBaseBackend'] + + +class CouchBaseBackend(KeyValueStoreBackend): + bucket = 'default' + host = 'localhost' + port = 8091 + username = None + password = None + quiet = False + conncache = None + unlock_gil = True + timeout = 2.5 + transcoder = None + # supports_autoexpire = False + + def __init__(self, url=None, *args, **kwargs): + """Initialize CouchBase backend instance. + + :raises celery.exceptions.ImproperlyConfigured: if + module :mod:`couchbase` is not available. + + """ + super(CouchBaseBackend, self).__init__(*args, **kwargs) + + self.expires = kwargs.get('expires') or maybe_timedelta( + self.app.conf.CELERY_TASK_RESULT_EXPIRES) + + if Couchbase is None: + raise ImproperlyConfigured( + 'You need to install the couchbase library to use the ' + 'CouchBase backend.', + ) + + uhost = uport = uname = upass = ubucket = None + if url: + _, uhost, uport, uname, upass, ubucket, _ = _parse_url(url) + ubucket = ubucket.strip('/') if ubucket else None + + config = self.app.conf.get('CELERY_COUCHBASE_BACKEND_SETTINGS', None) + if config is not None: + if not isinstance(config, dict): + raise ImproperlyConfigured( + 'Couchbase backend settings should be grouped in a dict', + ) + else: + config = {} + + self.host = uhost or config.get('host', self.host) + self.port = int(uport or config.get('port', self.port)) + self.bucket = ubucket or config.get('bucket', self.bucket) + self.username = uname or config.get('username', self.username) + self.password = upass or config.get('password', self.password) + + self._connection = None + + def _get_connection(self): + """Connect to the Couchbase server.""" + if self._connection is None: + kwargs = {'bucket': self.bucket, 'host': self.host} + + if self.port: + kwargs.update({'port': self.port}) + if self.username: + kwargs.update({'username': self.username}) + if self.password: + kwargs.update({'password': self.password}) + + logging.debug('couchbase settings %r', kwargs) + self._connection = Connection(**kwargs) + return self._connection + + @property + def connection(self): + return self._get_connection() + + def get(self, key): + try: + return self.connection.get(key).value + except NotFoundError: + return None + + def set(self, key, value): + self.connection.set(key, value) + + def mget(self, keys): + return [self.get(key) for key in keys] + + def delete(self, key): + self.connection.delete(key) diff --git a/celery/backends/database/__init__.py b/celery/backends/database/__init__.py new file mode 100644 index 0000000..c52e758 --- /dev/null +++ b/celery/backends/database/__init__.py @@ -0,0 +1,201 @@ +# -*- coding: utf-8 -*- +""" + celery.backends.database + ~~~~~~~~~~~~~~~~~~~~~~~~ + + SQLAlchemy result store backend. + +""" +from __future__ import absolute_import + +import logging +from contextlib import contextmanager +from functools import wraps + +from celery import states +from celery.backends.base import BaseBackend +from celery.exceptions import ImproperlyConfigured +from celery.five import range +from celery.utils.timeutils import maybe_timedelta + +from .models import Task +from .models import TaskSet +from .session import SessionManager + +logger = logging.getLogger(__name__) + +__all__ = ['DatabaseBackend'] + + +def _sqlalchemy_installed(): + try: + import sqlalchemy + except ImportError: + raise ImproperlyConfigured( + 'The database result backend requires SQLAlchemy to be installed.' + 'See http://pypi.python.org/pypi/SQLAlchemy') + return sqlalchemy +_sqlalchemy_installed() + +from sqlalchemy.exc import DatabaseError, InvalidRequestError +from sqlalchemy.orm.exc import StaleDataError + + +@contextmanager +def session_cleanup(session): + try: + yield + except Exception: + session.rollback() + raise + finally: + session.close() + + +def retry(fun): + + @wraps(fun) + def _inner(*args, **kwargs): + max_retries = kwargs.pop('max_retries', 3) + + for retries in range(max_retries): + try: + return fun(*args, **kwargs) + except (DatabaseError, InvalidRequestError, StaleDataError): + logger.warning( + "Failed operation %s. Retrying %s more times.", + fun.__name__, max_retries - retries - 1, + exc_info=True, + ) + if retries + 1 >= max_retries: + raise + + return _inner + + +class DatabaseBackend(BaseBackend): + """The database result backend.""" + # ResultSet.iterate should sleep this much between each pool, + # to not bombard the database with queries. + subpolling_interval = 0.5 + + def __init__(self, dburi=None, expires=None, + engine_options=None, url=None, **kwargs): + # The `url` argument was added later and is used by + # the app to set backend by url (celery.backends.get_backend_by_url) + super(DatabaseBackend, self).__init__(**kwargs) + conf = self.app.conf + self.expires = maybe_timedelta(self.prepare_expires(expires)) + self.dburi = url or dburi or conf.CELERY_RESULT_DBURI + self.engine_options = dict( + engine_options or {}, + **conf.CELERY_RESULT_ENGINE_OPTIONS or {}) + self.short_lived_sessions = kwargs.get( + 'short_lived_sessions', + conf.CELERY_RESULT_DB_SHORT_LIVED_SESSIONS, + ) + + tablenames = conf.CELERY_RESULT_DB_TABLENAMES or {} + Task.__table__.name = tablenames.get('task', 'celery_taskmeta') + TaskSet.__table__.name = tablenames.get('group', 'celery_tasksetmeta') + + if not self.dburi: + raise ImproperlyConfigured( + 'Missing connection string! Do you have ' + 'CELERY_RESULT_DBURI set to a real value?') + + def ResultSession(self, session_manager=SessionManager()): + return session_manager.session_factory( + dburi=self.dburi, + short_lived_sessions=self.short_lived_sessions, + **self.engine_options + ) + + @retry + def _store_result(self, task_id, result, status, + traceback=None, max_retries=3, **kwargs): + """Store return value and status of an executed task.""" + session = self.ResultSession() + with session_cleanup(session): + task = list(session.query(Task).filter(Task.task_id == task_id)) + task = task and task[0] + if not task: + task = Task(task_id) + session.add(task) + session.flush() + task.result = result + task.status = status + task.traceback = traceback + session.commit() + return result + + @retry + def _get_task_meta_for(self, task_id): + """Get task metadata for a task by id.""" + session = self.ResultSession() + with session_cleanup(session): + task = list(session.query(Task).filter(Task.task_id == task_id)) + task = task and task[0] + if not task: + task = Task(task_id) + task.status = states.PENDING + task.result = None + return task.to_dict() + + @retry + def _save_group(self, group_id, result): + """Store the result of an executed group.""" + session = self.ResultSession() + with session_cleanup(session): + group = TaskSet(group_id, result) + session.add(group) + session.flush() + session.commit() + return result + + @retry + def _restore_group(self, group_id): + """Get metadata for group by id.""" + session = self.ResultSession() + with session_cleanup(session): + group = session.query(TaskSet).filter( + TaskSet.taskset_id == group_id).first() + if group: + return group.to_dict() + + @retry + def _delete_group(self, group_id): + """Delete metadata for group by id.""" + session = self.ResultSession() + with session_cleanup(session): + session.query(TaskSet).filter( + TaskSet.taskset_id == group_id).delete() + session.flush() + session.commit() + + @retry + def _forget(self, task_id): + """Forget about result.""" + session = self.ResultSession() + with session_cleanup(session): + session.query(Task).filter(Task.task_id == task_id).delete() + session.commit() + + def cleanup(self): + """Delete expired metadata.""" + session = self.ResultSession() + expires = self.expires + now = self.app.now() + with session_cleanup(session): + session.query(Task).filter( + Task.date_done < (now - expires)).delete() + session.query(TaskSet).filter( + TaskSet.date_done < (now - expires)).delete() + session.commit() + + def __reduce__(self, args=(), kwargs={}): + kwargs.update( + dict(dburi=self.dburi, + expires=self.expires, + engine_options=self.engine_options)) + return super(DatabaseBackend, self).__reduce__(args, kwargs) diff --git a/celery/backends/database/models.py b/celery/backends/database/models.py new file mode 100644 index 0000000..2802a00 --- /dev/null +++ b/celery/backends/database/models.py @@ -0,0 +1,74 @@ +# -*- coding: utf-8 -*- +""" + celery.backends.database.models + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + Database tables for the SQLAlchemy result store backend. + +""" +from __future__ import absolute_import + +from datetime import datetime + +import sqlalchemy as sa +from sqlalchemy.types import PickleType + +from celery import states + +from .session import ResultModelBase + +__all__ = ['Task', 'TaskSet'] + + +class Task(ResultModelBase): + """Task result/status.""" + __tablename__ = 'celery_taskmeta' + __table_args__ = {'sqlite_autoincrement': True} + + id = sa.Column(sa.Integer, sa.Sequence('task_id_sequence'), + primary_key=True, + autoincrement=True) + task_id = sa.Column(sa.String(255), unique=True) + status = sa.Column(sa.String(50), default=states.PENDING) + result = sa.Column(PickleType, nullable=True) + date_done = sa.Column(sa.DateTime, default=datetime.utcnow, + onupdate=datetime.utcnow, nullable=True) + traceback = sa.Column(sa.Text, nullable=True) + + def __init__(self, task_id): + self.task_id = task_id + + def to_dict(self): + return {'task_id': self.task_id, + 'status': self.status, + 'result': self.result, + 'traceback': self.traceback, + 'date_done': self.date_done} + + def __repr__(self): + return ''.format(self) + + +class TaskSet(ResultModelBase): + """TaskSet result""" + __tablename__ = 'celery_tasksetmeta' + __table_args__ = {'sqlite_autoincrement': True} + + id = sa.Column(sa.Integer, sa.Sequence('taskset_id_sequence'), + autoincrement=True, primary_key=True) + taskset_id = sa.Column(sa.String(255), unique=True) + result = sa.Column(PickleType, nullable=True) + date_done = sa.Column(sa.DateTime, default=datetime.utcnow, + nullable=True) + + def __init__(self, taskset_id, result): + self.taskset_id = taskset_id + self.result = result + + def to_dict(self): + return {'taskset_id': self.taskset_id, + 'result': self.result, + 'date_done': self.date_done} + + def __repr__(self): + return ''.format(self) diff --git a/celery/backends/database/session.py b/celery/backends/database/session.py new file mode 100644 index 0000000..1575d7f --- /dev/null +++ b/celery/backends/database/session.py @@ -0,0 +1,62 @@ +# -*- coding: utf-8 -*- +""" + celery.backends.database.session + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + SQLAlchemy sessions. + +""" +from __future__ import absolute_import + +from billiard.util import register_after_fork + +from sqlalchemy import create_engine +from sqlalchemy.ext.declarative import declarative_base +from sqlalchemy.orm import sessionmaker +from sqlalchemy.pool import NullPool + +ResultModelBase = declarative_base() + +__all__ = ['SessionManager'] + + +class SessionManager(object): + def __init__(self): + self._engines = {} + self._sessions = {} + self.forked = False + self.prepared = False + register_after_fork(self, self._after_fork) + + def _after_fork(self,): + self.forked = True + + def get_engine(self, dburi, **kwargs): + if self.forked: + try: + return self._engines[dburi] + except KeyError: + engine = self._engines[dburi] = create_engine(dburi, **kwargs) + return engine + else: + kwargs['poolclass'] = NullPool + return create_engine(dburi, **kwargs) + + def create_session(self, dburi, short_lived_sessions=False, **kwargs): + engine = self.get_engine(dburi, **kwargs) + if self.forked: + if short_lived_sessions or dburi not in self._sessions: + self._sessions[dburi] = sessionmaker(bind=engine) + return engine, self._sessions[dburi] + else: + return engine, sessionmaker(bind=engine) + + def prepare_models(self, engine): + if not self.prepared: + ResultModelBase.metadata.create_all(engine) + self.prepared = True + + def session_factory(self, dburi, **kwargs): + engine, session = self.create_session(dburi, **kwargs) + self.prepare_models(engine) + return session() diff --git a/celery/backends/mongodb.py b/celery/backends/mongodb.py new file mode 100644 index 0000000..44c1c22 --- /dev/null +++ b/celery/backends/mongodb.py @@ -0,0 +1,241 @@ +# -*- coding: utf-8 -*- +""" + celery.backends.mongodb + ~~~~~~~~~~~~~~~~~~~~~~~ + + MongoDB result store backend. + +""" +from __future__ import absolute_import + +from datetime import datetime + +try: + import pymongo +except ImportError: # pragma: no cover + pymongo = None # noqa + +if pymongo: + try: + from bson.binary import Binary + except ImportError: # pragma: no cover + from pymongo.binary import Binary # noqa +else: # pragma: no cover + Binary = None # noqa + +from kombu.syn import detect_environment +from kombu.utils import cached_property + +from celery import states +from celery.exceptions import ImproperlyConfigured +from celery.five import string_t +from celery.utils.timeutils import maybe_timedelta + +from .base import BaseBackend + +__all__ = ['MongoBackend'] + + +class Bunch(object): + + def __init__(self, **kw): + self.__dict__.update(kw) + + +class MongoBackend(BaseBackend): + host = 'localhost' + port = 27017 + user = None + password = None + database_name = 'celery' + taskmeta_collection = 'celery_taskmeta' + max_pool_size = 10 + options = None + + supports_autoexpire = False + + _connection = None + + def __init__(self, *args, **kwargs): + """Initialize MongoDB backend instance. + + :raises celery.exceptions.ImproperlyConfigured: if + module :mod:`pymongo` is not available. + + """ + self.options = {} + super(MongoBackend, self).__init__(*args, **kwargs) + self.expires = kwargs.get('expires') or maybe_timedelta( + self.app.conf.CELERY_TASK_RESULT_EXPIRES) + + if not pymongo: + raise ImproperlyConfigured( + 'You need to install the pymongo library to use the ' + 'MongoDB backend.') + + config = self.app.conf.get('CELERY_MONGODB_BACKEND_SETTINGS') + if config is not None: + if not isinstance(config, dict): + raise ImproperlyConfigured( + 'MongoDB backend settings should be grouped in a dict') + config = dict(config) # do not modify original + + self.host = config.pop('host', self.host) + self.port = int(config.pop('port', self.port)) + self.user = config.pop('user', self.user) + self.password = config.pop('password', self.password) + self.database_name = config.pop('database', self.database_name) + self.taskmeta_collection = config.pop( + 'taskmeta_collection', self.taskmeta_collection, + ) + + self.options = dict(config, **config.pop('options', None) or {}) + + # Set option defaults + self.options.setdefault('max_pool_size', self.max_pool_size) + self.options.setdefault('auto_start_request', False) + + url = kwargs.get('url') + if url: + # Specifying backend as an URL + self.host = url + + def _get_connection(self): + """Connect to the MongoDB server.""" + if self._connection is None: + from pymongo import MongoClient + + # The first pymongo.Connection() argument (host) can be + # a list of ['host:port'] elements or a mongodb connection + # URI. If this is the case, don't use self.port + # but let pymongo get the port(s) from the URI instead. + # This enables the use of replica sets and sharding. + # See pymongo.Connection() for more info. + url = self.host + if isinstance(url, string_t) \ + and not url.startswith('mongodb://'): + url = 'mongodb://{0}:{1}'.format(url, self.port) + if url == 'mongodb://': + url = url + 'localhost' + if detect_environment() != 'default': + self.options['use_greenlets'] = True + self._connection = MongoClient(host=url, **self.options) + + return self._connection + + def process_cleanup(self): + if self._connection is not None: + # MongoDB connection will be closed automatically when object + # goes out of scope + del(self.collection) + del(self.database) + self._connection = None + + def _store_result(self, task_id, result, status, + traceback=None, request=None, **kwargs): + """Store return value and status of an executed task.""" + meta = {'_id': task_id, + 'status': status, + 'result': Binary(self.encode(result)), + 'date_done': datetime.utcnow(), + 'traceback': Binary(self.encode(traceback)), + 'children': Binary(self.encode( + self.current_task_children(request), + ))} + self.collection.save(meta) + + return result + + def _get_task_meta_for(self, task_id): + """Get task metadata for a task by id.""" + + obj = self.collection.find_one({'_id': task_id}) + if not obj: + return {'status': states.PENDING, 'result': None} + + meta = { + 'task_id': obj['_id'], + 'status': obj['status'], + 'result': self.decode(obj['result']), + 'date_done': obj['date_done'], + 'traceback': self.decode(obj['traceback']), + 'children': self.decode(obj['children']), + } + + return meta + + def _save_group(self, group_id, result): + """Save the group result.""" + meta = {'_id': group_id, + 'result': Binary(self.encode(result)), + 'date_done': datetime.utcnow()} + self.collection.save(meta) + + return result + + def _restore_group(self, group_id): + """Get the result for a group by id.""" + obj = self.collection.find_one({'_id': group_id}) + if not obj: + return + + meta = { + 'task_id': obj['_id'], + 'result': self.decode(obj['result']), + 'date_done': obj['date_done'], + } + + return meta + + def _delete_group(self, group_id): + """Delete a group by id.""" + self.collection.remove({'_id': group_id}) + + def _forget(self, task_id): + """ + Remove result from MongoDB. + + :raises celery.exceptions.OperationsError: if the task_id could not be + removed. + """ + # By using safe=True, this will wait until it receives a response from + # the server. Likewise, it will raise an OperationsError if the + # response was unable to be completed. + self.collection.remove({'_id': task_id}) + + def cleanup(self): + """Delete expired metadata.""" + self.collection.remove( + {'date_done': {'$lt': self.app.now() - self.expires}}, + ) + + def __reduce__(self, args=(), kwargs={}): + kwargs.update( + dict(expires=self.expires)) + return super(MongoBackend, self).__reduce__(args, kwargs) + + def _get_database(self): + conn = self._get_connection() + db = conn[self.database_name] + if self.user and self.password: + if not db.authenticate(self.user, + self.password): + raise ImproperlyConfigured( + 'Invalid MongoDB username or password.') + return db + + @cached_property + def database(self): + """Get database from MongoDB connection and perform authentication + if necessary.""" + return self._get_database() + + @cached_property + def collection(self): + """Get the metadata task collection.""" + collection = self.database[self.taskmeta_collection] + + # Ensure an index on date_done is there, if not process the index + # in the background. Once completed cleanup will be much faster + collection.ensure_index('date_done', background='true') + return collection diff --git a/celery/backends/redis.py b/celery/backends/redis.py new file mode 100644 index 0000000..314b1d2 --- /dev/null +++ b/celery/backends/redis.py @@ -0,0 +1,271 @@ +# -*- coding: utf-8 -*- +""" + celery.backends.redis + ~~~~~~~~~~~~~~~~~~~~~ + + Redis result store backend. + +""" +from __future__ import absolute_import + +from functools import partial + +from kombu.utils import cached_property, retry_over_time +from kombu.utils.url import _parse_url + +from celery import states +from celery.canvas import maybe_signature +from celery.exceptions import ChordError, ImproperlyConfigured +from celery.five import string_t +from celery.utils import deprecated_property, strtobool +from celery.utils.functional import dictfilter +from celery.utils.log import get_logger +from celery.utils.timeutils import humanize_seconds + +from .base import KeyValueStoreBackend + +try: + import redis + from redis.exceptions import ConnectionError + from kombu.transport.redis import get_redis_error_classes +except ImportError: # pragma: no cover + redis = None # noqa + ConnectionError = None # noqa + get_redis_error_classes = None # noqa + +__all__ = ['RedisBackend'] + +REDIS_MISSING = """\ +You need to install the redis library in order to use \ +the Redis result store backend.""" + +logger = get_logger(__name__) +error = logger.error + + +class RedisBackend(KeyValueStoreBackend): + """Redis task result store.""" + + #: redis-py client module. + redis = redis + + #: Maximium number of connections in the pool. + max_connections = None + + supports_autoexpire = True + supports_native_join = True + implements_incr = True + + def __init__(self, host=None, port=None, db=None, password=None, + expires=None, max_connections=None, url=None, + connection_pool=None, new_join=False, **kwargs): + super(RedisBackend, self).__init__(**kwargs) + conf = self.app.conf + if self.redis is None: + raise ImproperlyConfigured(REDIS_MISSING) + + # For compatibility with the old REDIS_* configuration keys. + def _get(key): + for prefix in 'CELERY_REDIS_{0}', 'REDIS_{0}': + try: + return conf[prefix.format(key)] + except KeyError: + pass + if host and '://' in host: + url = host + host = None + + self.max_connections = ( + max_connections or _get('MAX_CONNECTIONS') or self.max_connections + ) + self._ConnectionPool = connection_pool + + self.connparams = { + 'host': _get('HOST') or 'localhost', + 'port': _get('PORT') or 6379, + 'db': _get('DB') or 0, + 'password': _get('PASSWORD'), + 'max_connections': max_connections, + } + if url: + self.connparams = self._params_from_url(url, self.connparams) + self.url = url + self.expires = self.prepare_expires(expires, type=int) + + try: + new_join = strtobool(self.connparams.pop('new_join')) + except KeyError: + pass + if new_join: + self.apply_chord = self._new_chord_apply + self.on_chord_part_return = self._new_chord_return + + self.connection_errors, self.channel_errors = ( + get_redis_error_classes() if get_redis_error_classes + else ((), ())) + + def _params_from_url(self, url, defaults): + scheme, host, port, user, password, path, query = _parse_url(url) + connparams = dict( + defaults, **dictfilter({ + 'host': host, 'port': port, 'password': password, + 'db': query.pop('virtual_host', None)}) + ) + + if scheme == 'socket': + # use 'path' as path to the socket… in this case + # the database number should be given in 'query' + connparams.update({ + 'connection_class': self.redis.UnixDomainSocketConnection, + 'path': '/' + path, + }) + # host+port are invalid options when using this connection type. + connparams.pop('host', None) + connparams.pop('port', None) + else: + connparams['db'] = path + + # db may be string and start with / like in kombu. + db = connparams.get('db') or 0 + db = db.strip('/') if isinstance(db, string_t) else db + connparams['db'] = int(db) + + # Query parameters override other parameters + connparams.update(query) + return connparams + + def get(self, key): + return self.client.get(key) + + def mget(self, keys): + return self.client.mget(keys) + + def ensure(self, fun, args, **policy): + retry_policy = dict(self.retry_policy, **policy) + max_retries = retry_policy.get('max_retries') + return retry_over_time( + fun, self.connection_errors, args, {}, + partial(self.on_connection_error, max_retries), + **retry_policy + ) + + def on_connection_error(self, max_retries, exc, intervals, retries): + tts = next(intervals) + error('Connection to Redis lost: Retry (%s/%s) %s.', + retries, max_retries or 'Inf', + humanize_seconds(tts, 'in ')) + return tts + + def set(self, key, value, **retry_policy): + return self.ensure(self._set, (key, value), **retry_policy) + + def _set(self, key, value): + pipe = self.client.pipeline() + if self.expires: + pipe.setex(key, value, self.expires) + else: + pipe.set(key, value) + pipe.publish(key, value) + pipe.execute() + + def delete(self, key): + self.client.delete(key) + + def incr(self, key): + return self.client.incr(key) + + def expire(self, key, value): + return self.client.expire(key, value) + + def _unpack_chord_result(self, tup, decode, + PROPAGATE_STATES=states.PROPAGATE_STATES): + _, tid, state, retval = decode(tup) + if state in PROPAGATE_STATES: + raise ChordError('Dependency {0} raised {1!r}'.format(tid, retval)) + return retval + + def _new_chord_apply(self, header, partial_args, group_id, body, + result=None, **options): + # avoids saving the group in the redis db. + return header(*partial_args, task_id=group_id) + + def _new_chord_return(self, task, state, result, propagate=None, + PROPAGATE_STATES=states.PROPAGATE_STATES): + app = self.app + if propagate is None: + propagate = self.app.conf.CELERY_CHORD_PROPAGATES + request = task.request + tid, gid = request.id, request.group + if not gid or not tid: + return + + client = self.client + jkey = self.get_key_for_group(gid, '.j') + result = self.encode_result(result, state) + _, readycount, _ = client.pipeline() \ + .rpush(jkey, self.encode([1, tid, state, result])) \ + .llen(jkey) \ + .expire(jkey, 86400) \ + .execute() + + try: + callback = maybe_signature(request.chord, app=app) + total = callback['chord_size'] + if readycount >= total: + decode, unpack = self.decode, self._unpack_chord_result + resl, _ = client.pipeline() \ + .lrange(jkey, 0, total) \ + .delete(jkey) \ + .execute() + try: + callback.delay([unpack(tup, decode) for tup in resl]) + except Exception as exc: + error('Chord callback for %r raised: %r', + request.group, exc, exc_info=1) + app._tasks[callback.task].backend.fail_from_current_stack( + callback.id, + exc=ChordError('Callback error: {0!r}'.format(exc)), + ) + except ChordError as exc: + error('Chord %r raised: %r', request.group, exc, exc_info=1) + app._tasks[callback.task].backend.fail_from_current_stack( + callback.id, exc=exc, + ) + except Exception as exc: + error('Chord %r raised: %r', request.group, exc, exc_info=1) + app._tasks[callback.task].backend.fail_from_current_stack( + callback.id, exc=ChordError('Join error: {0!r}'.format(exc)), + ) + + @property + def ConnectionPool(self): + if self._ConnectionPool is None: + self._ConnectionPool = self.redis.ConnectionPool + return self._ConnectionPool + + @cached_property + def client(self): + return self.redis.Redis( + connection_pool=self.ConnectionPool(**self.connparams), + ) + + def __reduce__(self, args=(), kwargs={}): + return super(RedisBackend, self).__reduce__( + (self.url, ), {'expires': self.expires}, + ) + + @deprecated_property(3.2, 3.3) + def host(self): + return self.connparams['host'] + + @deprecated_property(3.2, 3.3) + def port(self): + return self.connparams['port'] + + @deprecated_property(3.2, 3.3) + def db(self): + return self.connparams['db'] + + @deprecated_property(3.2, 3.3) + def password(self): + return self.connparams['password'] diff --git a/celery/backends/rpc.py b/celery/backends/rpc.py new file mode 100644 index 0000000..28d5426 --- /dev/null +++ b/celery/backends/rpc.py @@ -0,0 +1,64 @@ +# -*- coding: utf-8 -*- +""" + celery.backends.rpc + ~~~~~~~~~~~~~~~~~~~ + + RPC-style result backend, using reply-to and one queue per client. + +""" +from __future__ import absolute_import + +from kombu import Consumer, Exchange +from kombu.common import maybe_declare +from kombu.utils import cached_property + +from celery import current_task +from celery.backends import amqp + +__all__ = ['RPCBackend'] + + +class RPCBackend(amqp.AMQPBackend): + persistent = False + + class Consumer(Consumer): + auto_declare = False + + def _create_exchange(self, name, type='direct', delivery_mode=2): + # uses direct to queue routing (anon exchange). + return Exchange(None) + + def on_task_call(self, producer, task_id): + maybe_declare(self.binding(producer.channel), retry=True) + + def _create_binding(self, task_id): + return self.binding + + def _many_bindings(self, ids): + return [self.binding] + + def rkey(self, task_id): + return task_id + + def destination_for(self, task_id, request): + # Request is a new argument for backends, so must still support + # old code that rely on current_task + try: + request = request or current_task.request + except AttributeError: + raise RuntimeError( + 'RPC backend missing task request for {0!r}'.format(task_id), + ) + return request.reply_to, request.correlation_id or task_id + + def on_reply_declare(self, task_id): + pass + + @property + def binding(self): + return self.Queue(self.oid, self.exchange, self.oid, + durable=False, auto_delete=False) + + @cached_property + def oid(self): + return self.app.oid diff --git a/celery/beat.py b/celery/beat.py new file mode 100644 index 0000000..00799c6 --- /dev/null +++ b/celery/beat.py @@ -0,0 +1,548 @@ +# -*- coding: utf-8 -*- +""" + celery.beat + ~~~~~~~~~~~ + + The periodic task scheduler. + +""" +from __future__ import absolute_import + +import errno +import os +import time +import shelve +import sys +import traceback + +from threading import Event, Thread + +from billiard import Process, ensure_multiprocessing +from billiard.common import reset_signals +from kombu.utils import cached_property, reprcall +from kombu.utils.functional import maybe_evaluate + +from . import __version__ +from . import platforms +from . import signals +from .five import items, reraise, values, monotonic +from .schedules import maybe_schedule, crontab +from .utils.imports import instantiate +from .utils.timeutils import humanize_seconds +from .utils.log import get_logger, iter_open_logger_fds + +__all__ = ['SchedulingError', 'ScheduleEntry', 'Scheduler', + 'PersistentScheduler', 'Service', 'EmbeddedService'] + +logger = get_logger(__name__) +debug, info, error, warning = (logger.debug, logger.info, + logger.error, logger.warning) + +DEFAULT_MAX_INTERVAL = 300 # 5 minutes + + +class SchedulingError(Exception): + """An error occured while scheduling a task.""" + + +class ScheduleEntry(object): + """An entry in the scheduler. + + :keyword name: see :attr:`name`. + :keyword schedule: see :attr:`schedule`. + :keyword args: see :attr:`args`. + :keyword kwargs: see :attr:`kwargs`. + :keyword options: see :attr:`options`. + :keyword last_run_at: see :attr:`last_run_at`. + :keyword total_run_count: see :attr:`total_run_count`. + :keyword relative: Is the time relative to when the server starts? + + """ + + #: The task name + name = None + + #: The schedule (run_every/crontab) + schedule = None + + #: Positional arguments to apply. + args = None + + #: Keyword arguments to apply. + kwargs = None + + #: Task execution options. + options = None + + #: The time and date of when this task was last scheduled. + last_run_at = None + + #: Total number of times this task has been scheduled. + total_run_count = 0 + + def __init__(self, name=None, task=None, last_run_at=None, + total_run_count=None, schedule=None, args=(), kwargs={}, + options={}, relative=False, app=None): + self.app = app + self.name = name + self.task = task + self.args = args + self.kwargs = kwargs + self.options = options + self.schedule = maybe_schedule(schedule, relative, app=self.app) + self.last_run_at = last_run_at or self._default_now() + self.total_run_count = total_run_count or 0 + + def _default_now(self): + return self.schedule.now() if self.schedule else self.app.now() + + def _next_instance(self, last_run_at=None): + """Return a new instance of the same class, but with + its date and count fields updated.""" + return self.__class__(**dict( + self, + last_run_at=last_run_at or self._default_now(), + total_run_count=self.total_run_count + 1, + )) + __next__ = next = _next_instance # for 2to3 + + def __reduce__(self): + return self.__class__, ( + self.name, self.task, self.last_run_at, self.total_run_count, + self.schedule, self.args, self.kwargs, self.options, + ) + + def update(self, other): + """Update values from another entry. + + Does only update "editable" fields (task, schedule, args, kwargs, + options). + + """ + self.__dict__.update({'task': other.task, 'schedule': other.schedule, + 'args': other.args, 'kwargs': other.kwargs, + 'options': other.options}) + + def is_due(self): + """See :meth:`~celery.schedule.schedule.is_due`.""" + return self.schedule.is_due(self.last_run_at) + + def __iter__(self): + return iter(items(vars(self))) + + def __repr__(self): + return '%s', entry.task, result.id) + return next_time_to_run + + def tick(self): + """Run a tick, that is one iteration of the scheduler. + + Executes all due tasks. + + """ + remaining_times = [] + try: + for entry in values(self.schedule): + next_time_to_run = self.maybe_due(entry, self.publisher) + if next_time_to_run: + remaining_times.append(next_time_to_run) + except RuntimeError: + pass + + return min(remaining_times + [self.max_interval]) + + def should_sync(self): + return ( + (not self._last_sync or + (monotonic() - self._last_sync) > self.sync_every) or + (self.sync_every_tasks and + self._tasks_since_sync >= self.sync_every_tasks) + ) + + def reserve(self, entry): + new_entry = self.schedule[entry.name] = next(entry) + return new_entry + + def apply_async(self, entry, publisher=None, **kwargs): + # Update timestamps and run counts before we actually execute, + # so we have that done if an exception is raised (doesn't schedule + # forever.) + entry = self.reserve(entry) + task = self.app.tasks.get(entry.task) + + try: + if task: + result = task.apply_async(entry.args, entry.kwargs, + publisher=publisher, + **entry.options) + else: + result = self.send_task(entry.task, entry.args, entry.kwargs, + publisher=publisher, + **entry.options) + except Exception as exc: + reraise(SchedulingError, SchedulingError( + "Couldn't apply scheduled task {0.name}: {exc}".format( + entry, exc=exc)), sys.exc_info()[2]) + finally: + self._tasks_since_sync += 1 + if self.should_sync(): + self._do_sync() + return result + + def send_task(self, *args, **kwargs): + return self.app.send_task(*args, **kwargs) + + def setup_schedule(self): + self.install_default_entries(self.data) + + def _do_sync(self): + try: + debug('beat: Synchronizing schedule...') + self.sync() + finally: + self._last_sync = monotonic() + self._tasks_since_sync = 0 + + def sync(self): + pass + + def close(self): + self.sync() + + def add(self, **kwargs): + entry = self.Entry(app=self.app, **kwargs) + self.schedule[entry.name] = entry + return entry + + def _maybe_entry(self, name, entry): + if isinstance(entry, self.Entry): + entry.app = self.app + return entry + return self.Entry(**dict(entry, name=name, app=self.app)) + + def update_from_dict(self, dict_): + self.schedule.update(dict( + (name, self._maybe_entry(name, entry)) + for name, entry in items(dict_))) + + def merge_inplace(self, b): + schedule = self.schedule + A, B = set(schedule), set(b) + + # Remove items from disk not in the schedule anymore. + for key in A ^ B: + schedule.pop(key, None) + + # Update and add new items in the schedule + for key in B: + entry = self.Entry(**dict(b[key], name=key, app=self.app)) + if schedule.get(key): + schedule[key].update(entry) + else: + schedule[key] = entry + + def _ensure_connected(self): + # callback called for each retry while the connection + # can't be established. + def _error_handler(exc, interval): + error('beat: Connection error: %s. ' + 'Trying again in %s seconds...', exc, interval) + + return self.connection.ensure_connection( + _error_handler, self.app.conf.BROKER_CONNECTION_MAX_RETRIES + ) + + def get_schedule(self): + return self.data + + def set_schedule(self, schedule): + self.data = schedule + schedule = property(get_schedule, set_schedule) + + @cached_property + def connection(self): + return self.app.connection() + + @cached_property + def publisher(self): + return self.Publisher(self._ensure_connected()) + + @property + def info(self): + return '' + + +class PersistentScheduler(Scheduler): + persistence = shelve + known_suffixes = ('', '.db', '.dat', '.bak', '.dir') + + _store = None + + def __init__(self, *args, **kwargs): + self.schedule_filename = kwargs.get('schedule_filename') + Scheduler.__init__(self, *args, **kwargs) + + def _remove_db(self): + for suffix in self.known_suffixes: + with platforms.ignore_errno(errno.ENOENT): + os.remove(self.schedule_filename + suffix) + + def setup_schedule(self): + try: + self._store = self.persistence.open(self.schedule_filename, + writeback=True) + except Exception as exc: + error('Removing corrupted schedule file %r: %r', + self.schedule_filename, exc, exc_info=True) + self._remove_db() + self._store = self.persistence.open(self.schedule_filename, + writeback=True) + else: + try: + self._store['entries'] + except KeyError: + # new schedule db + self._store['entries'] = {} + else: + if '__version__' not in self._store: + warning('DB Reset: Account for new __version__ field') + self._store.clear() # remove schedule at 2.2.2 upgrade. + elif 'tz' not in self._store: + warning('DB Reset: Account for new tz field') + self._store.clear() # remove schedule at 3.0.8 upgrade + elif 'utc_enabled' not in self._store: + warning('DB Reset: Account for new utc_enabled field') + self._store.clear() # remove schedule at 3.0.9 upgrade + + tz = self.app.conf.CELERY_TIMEZONE + stored_tz = self._store.get('tz') + if stored_tz is not None and stored_tz != tz: + warning('Reset: Timezone changed from %r to %r', stored_tz, tz) + self._store.clear() # Timezone changed, reset db! + utc = self.app.conf.CELERY_ENABLE_UTC + stored_utc = self._store.get('utc_enabled') + if stored_utc is not None and stored_utc != utc: + choices = {True: 'enabled', False: 'disabled'} + warning('Reset: UTC changed from %s to %s', + choices[stored_utc], choices[utc]) + self._store.clear() # UTC setting changed, reset db! + entries = self._store.setdefault('entries', {}) + self.merge_inplace(self.app.conf.CELERYBEAT_SCHEDULE) + self.install_default_entries(self.schedule) + self._store.update(__version__=__version__, tz=tz, utc_enabled=utc) + self.sync() + debug('Current schedule:\n' + '\n'.join( + repr(entry) for entry in values(entries))) + + def get_schedule(self): + return self._store['entries'] + + def set_schedule(self, schedule): + self._store['entries'] = schedule + schedule = property(get_schedule, set_schedule) + + def sync(self): + if self._store is not None: + self._store.sync() + + def close(self): + self.sync() + self._store.close() + + @property + def info(self): + return ' . db -> {self.schedule_filename}'.format(self=self) + + +class Service(object): + scheduler_cls = PersistentScheduler + + def __init__(self, app, max_interval=None, schedule_filename=None, + scheduler_cls=None): + self.app = app + self.max_interval = (max_interval + or app.conf.CELERYBEAT_MAX_LOOP_INTERVAL) + self.scheduler_cls = scheduler_cls or self.scheduler_cls + self.schedule_filename = ( + schedule_filename or app.conf.CELERYBEAT_SCHEDULE_FILENAME) + + self._is_shutdown = Event() + self._is_stopped = Event() + + def __reduce__(self): + return self.__class__, (self.max_interval, self.schedule_filename, + self.scheduler_cls, self.app) + + def start(self, embedded_process=False, drift=-0.010): + info('beat: Starting...') + debug('beat: Ticking with max interval->%s', + humanize_seconds(self.scheduler.max_interval)) + + signals.beat_init.send(sender=self) + if embedded_process: + signals.beat_embedded_init.send(sender=self) + platforms.set_process_title('celery beat') + + try: + while not self._is_shutdown.is_set(): + interval = self.scheduler.tick() + interval = interval + drift if interval else interval + if interval and interval > 0: + debug('beat: Waking up %s.', + humanize_seconds(interval, prefix='in ')) + time.sleep(interval) + except (KeyboardInterrupt, SystemExit): + self._is_shutdown.set() + finally: + self.sync() + + def sync(self): + self.scheduler.close() + self._is_stopped.set() + + def stop(self, wait=False): + info('beat: Shutting down...') + self._is_shutdown.set() + wait and self._is_stopped.wait() # block until shutdown done. + + def get_scheduler(self, lazy=False): + filename = self.schedule_filename + scheduler = instantiate(self.scheduler_cls, + app=self.app, + schedule_filename=filename, + max_interval=self.max_interval, + lazy=lazy) + return scheduler + + @cached_property + def scheduler(self): + return self.get_scheduler() + + +class _Threaded(Thread): + """Embedded task scheduler using threading.""" + + def __init__(self, *args, **kwargs): + super(_Threaded, self).__init__() + self.service = Service(*args, **kwargs) + self.daemon = True + self.name = 'Beat' + + def run(self): + self.service.start() + + def stop(self): + self.service.stop(wait=True) + + +try: + ensure_multiprocessing() +except NotImplementedError: # pragma: no cover + _Process = None +else: + class _Process(Process): # noqa + + def __init__(self, *args, **kwargs): + super(_Process, self).__init__() + self.service = Service(*args, **kwargs) + self.name = 'Beat' + + def run(self): + reset_signals(full=False) + platforms.close_open_fds([ + sys.__stdin__, sys.__stdout__, sys.__stderr__, + ] + list(iter_open_logger_fds())) + self.service.start(embedded_process=True) + + def stop(self): + self.service.stop() + self.terminate() + + +def EmbeddedService(*args, **kwargs): + """Return embedded clock service. + + :keyword thread: Run threaded instead of as a separate process. + Uses :mod:`multiprocessing` by default, if available. + + """ + if kwargs.pop('thread', False) or _Process is None: + # Need short max interval to be able to stop thread + # in reasonable time. + kwargs.setdefault('max_interval', 1) + return _Threaded(*args, **kwargs) + return _Process(*args, **kwargs) diff --git a/celery/bin/__init__.py b/celery/bin/__init__.py new file mode 100644 index 0000000..3f44b50 --- /dev/null +++ b/celery/bin/__init__.py @@ -0,0 +1,5 @@ +from __future__ import absolute_import + +from .base import Option + +__all__ = ['Option'] diff --git a/celery/bin/amqp.py b/celery/bin/amqp.py new file mode 100644 index 0000000..4dab152 --- /dev/null +++ b/celery/bin/amqp.py @@ -0,0 +1,369 @@ +# -*- coding: utf-8 -*- +""" +The :program:`celery amqp` command. + +.. program:: celery amqp + +""" +from __future__ import absolute_import, print_function, unicode_literals + +import cmd +import sys +import shlex +import pprint + +from functools import partial +from itertools import count + +from kombu.utils.encoding import safe_str + +from celery.utils.functional import padlist + +from celery.bin.base import Command +from celery.five import string_t +from celery.utils import strtobool + +__all__ = ['AMQPAdmin', 'AMQShell', 'Spec', 'amqp'] + +# Map to coerce strings to other types. +COERCE = {bool: strtobool} + +HELP_HEADER = """ +Commands +-------- +""".rstrip() + +EXAMPLE_TEXT = """ +Example: + -> queue.delete myqueue yes no +""" + +say = partial(print, file=sys.stderr) + + +class Spec(object): + """AMQP Command specification. + + Used to convert arguments to Python values and display various help + and tooltips. + + :param args: see :attr:`args`. + :keyword returns: see :attr:`returns`. + + .. attribute args:: + + List of arguments this command takes. Should + contain `(argument_name, argument_type)` tuples. + + .. attribute returns: + + Helpful human string representation of what this command returns. + May be :const:`None`, to signify the return type is unknown. + + """ + def __init__(self, *args, **kwargs): + self.args = args + self.returns = kwargs.get('returns') + + def coerce(self, index, value): + """Coerce value for argument at index.""" + arg_info = self.args[index] + arg_type = arg_info[1] + # Might be a custom way to coerce the string value, + # so look in the coercion map. + return COERCE.get(arg_type, arg_type)(value) + + def str_args_to_python(self, arglist): + """Process list of string arguments to values according to spec. + + e.g: + + >>> spec = Spec([('queue', str), ('if_unused', bool)]) + >>> spec.str_args_to_python('pobox', 'true') + ('pobox', True) + + """ + return tuple( + self.coerce(index, value) for index, value in enumerate(arglist)) + + def format_response(self, response): + """Format the return value of this command in a human-friendly way.""" + if not self.returns: + return 'ok.' if response is None else response + if callable(self.returns): + return self.returns(response) + return self.returns.format(response) + + def format_arg(self, name, type, default_value=None): + if default_value is not None: + return '{0}:{1}'.format(name, default_value) + return name + + def format_signature(self): + return ' '.join(self.format_arg(*padlist(list(arg), 3)) + for arg in self.args) + + +def dump_message(message): + if message is None: + return 'No messages in queue. basic.publish something.' + return {'body': message.body, + 'properties': message.properties, + 'delivery_info': message.delivery_info} + + +def format_declare_queue(ret): + return 'ok. queue:{0} messages:{1} consumers:{2}.'.format(*ret) + + +class AMQShell(cmd.Cmd): + """AMQP API Shell. + + :keyword connect: Function used to connect to the server, must return + connection object. + + :keyword silent: If :const:`True`, the commands won't have annoying + output not relevant when running in non-shell mode. + + + .. attribute: builtins + + Mapping of built-in command names -> method names + + .. attribute:: amqp + + Mapping of AMQP API commands and their :class:`Spec`. + + """ + conn = None + chan = None + prompt_fmt = '{self.counter}> ' + identchars = cmd.IDENTCHARS = '.' + needs_reconnect = False + counter = 1 + inc_counter = count(2) + + builtins = {'EOF': 'do_exit', + 'exit': 'do_exit', + 'help': 'do_help'} + + amqp = { + 'exchange.declare': Spec(('exchange', str), + ('type', str), + ('passive', bool, 'no'), + ('durable', bool, 'no'), + ('auto_delete', bool, 'no'), + ('internal', bool, 'no')), + 'exchange.delete': Spec(('exchange', str), + ('if_unused', bool)), + 'queue.bind': Spec(('queue', str), + ('exchange', str), + ('routing_key', str)), + 'queue.declare': Spec(('queue', str), + ('passive', bool, 'no'), + ('durable', bool, 'no'), + ('exclusive', bool, 'no'), + ('auto_delete', bool, 'no'), + returns=format_declare_queue), + 'queue.delete': Spec(('queue', str), + ('if_unused', bool, 'no'), + ('if_empty', bool, 'no'), + returns='ok. {0} messages deleted.'), + 'queue.purge': Spec(('queue', str), + returns='ok. {0} messages deleted.'), + 'basic.get': Spec(('queue', str), + ('no_ack', bool, 'off'), + returns=dump_message), + 'basic.publish': Spec(('msg', str), + ('exchange', str), + ('routing_key', str), + ('mandatory', bool, 'no'), + ('immediate', bool, 'no')), + 'basic.ack': Spec(('delivery_tag', int)), + } + + def __init__(self, *args, **kwargs): + self.connect = kwargs.pop('connect') + self.silent = kwargs.pop('silent', False) + self.out = kwargs.pop('out', sys.stderr) + cmd.Cmd.__init__(self, *args, **kwargs) + self._reconnect() + + def note(self, m): + """Say something to the user. Disabled if :attr:`silent`.""" + if not self.silent: + say(m, file=self.out) + + def say(self, m): + say(m, file=self.out) + + def get_amqp_api_command(self, cmd, arglist): + """With a command name and a list of arguments, convert the arguments + to Python values and find the corresponding method on the AMQP channel + object. + + :returns: tuple of `(method, processed_args)`. + + """ + spec = self.amqp[cmd] + args = spec.str_args_to_python(arglist) + attr_name = cmd.replace('.', '_') + if self.needs_reconnect: + self._reconnect() + return getattr(self.chan, attr_name), args, spec.format_response + + def do_exit(self, *args): + """The `'exit'` command.""" + self.note("\n-> please, don't leave!") + sys.exit(0) + + def display_command_help(self, cmd, short=False): + spec = self.amqp[cmd] + self.say('{0} {1}'.format(cmd, spec.format_signature())) + + def do_help(self, *args): + if not args: + self.say(HELP_HEADER) + for cmd_name in self.amqp: + self.display_command_help(cmd_name, short=True) + self.say(EXAMPLE_TEXT) + else: + self.display_command_help(args[0]) + + def default(self, line): + self.say("unknown syntax: {0!r}. how about some 'help'?".format(line)) + + def get_names(self): + return set(self.builtins) | set(self.amqp) + + def completenames(self, text, *ignored): + """Return all commands starting with `text`, for tab-completion.""" + names = self.get_names() + first = [cmd for cmd in names + if cmd.startswith(text.replace('_', '.'))] + if first: + return first + return [cmd for cmd in names + if cmd.partition('.')[2].startswith(text)] + + def dispatch(self, cmd, argline): + """Dispatch and execute the command. + + Lookup order is: :attr:`builtins` -> :attr:`amqp`. + + """ + arglist = shlex.split(safe_str(argline)) + if cmd in self.builtins: + return getattr(self, self.builtins[cmd])(*arglist) + fun, args, formatter = self.get_amqp_api_command(cmd, arglist) + return formatter(fun(*args)) + + def parseline(self, line): + """Parse input line. + + :returns: tuple of three items: + `(command_name, arglist, original_line)` + + """ + parts = line.split() + if parts: + return parts[0], ' '.join(parts[1:]), line + return '', '', line + + def onecmd(self, line): + """Parse line and execute command.""" + cmd, arg, line = self.parseline(line) + if not line: + return self.emptyline() + self.lastcmd = line + self.counter = next(self.inc_counter) + try: + self.respond(self.dispatch(cmd, arg)) + except (AttributeError, KeyError) as exc: + self.default(line) + except Exception as exc: + self.say(exc) + self.needs_reconnect = True + + def respond(self, retval): + """What to do with the return value of a command.""" + if retval is not None: + if isinstance(retval, string_t): + self.say(retval) + else: + self.say(pprint.pformat(retval)) + + def _reconnect(self): + """Re-establish connection to the AMQP server.""" + self.conn = self.connect(self.conn) + self.chan = self.conn.default_channel + self.needs_reconnect = False + + @property + def prompt(self): + return self.prompt_fmt.format(self=self) + + +class AMQPAdmin(object): + """The celery :program:`celery amqp` utility.""" + Shell = AMQShell + + def __init__(self, *args, **kwargs): + self.app = kwargs['app'] + self.out = kwargs.setdefault('out', sys.stderr) + self.silent = kwargs.get('silent') + self.args = args + + def connect(self, conn=None): + if conn: + conn.close() + conn = self.app.connection() + self.note('-> connecting to {0}.'.format(conn.as_uri())) + conn.connect() + self.note('-> connected.') + return conn + + def run(self): + shell = self.Shell(connect=self.connect, out=self.out) + if self.args: + return shell.onecmd(' '.join(self.args)) + try: + return shell.cmdloop() + except KeyboardInterrupt: + self.note('(bibi)') + pass + + def note(self, m): + if not self.silent: + say(m, file=self.out) + + +class amqp(Command): + """AMQP Administration Shell. + + Also works for non-amqp transports (but not ones that + store declarations in memory). + + Examples:: + + celery amqp + start shell mode + celery amqp help + show list of commands + + celery amqp exchange.delete name + celery amqp queue.delete queue + celery amqp queue.delete queue yes yes + + """ + + def run(self, *args, **options): + options['app'] = self.app + return AMQPAdmin(*args, **options).run() + + +def main(): + amqp().execute_from_commandline() + +if __name__ == '__main__': # pragma: no cover + main() diff --git a/celery/bin/base.py b/celery/bin/base.py new file mode 100644 index 0000000..8e3ff27 --- /dev/null +++ b/celery/bin/base.py @@ -0,0 +1,653 @@ +# -*- coding: utf-8 -*- +""" + +.. _preload-options: + +Preload Options +--------------- + +These options are supported by all commands, +and usually parsed before command-specific arguments. + +.. cmdoption:: -A, --app + + app instance to use (e.g. module.attr_name) + +.. cmdoption:: -b, --broker + + url to broker. default is 'amqp://guest@localhost//' + +.. cmdoption:: --loader + + name of custom loader class to use. + +.. cmdoption:: --config + + Name of the configuration module + +.. _daemon-options: + +Daemon Options +-------------- + +These options are supported by commands that can detach +into the background (daemon). They will be present +in any command that also has a `--detach` option. + +.. cmdoption:: -f, --logfile + + Path to log file. If no logfile is specified, `stderr` is used. + +.. cmdoption:: --pidfile + + Optional file used to store the process pid. + + The program will not start if this file already exists + and the pid is still alive. + +.. cmdoption:: --uid + + User id, or user name of the user to run as after detaching. + +.. cmdoption:: --gid + + Group id, or group name of the main group to change to after + detaching. + +.. cmdoption:: --umask + + Effective umask (in octal) of the process after detaching. Inherits + the umask of the parent process by default. + +.. cmdoption:: --workdir + + Optional directory to change to after detaching. + +""" +from __future__ import absolute_import, print_function, unicode_literals + +import os +import random +import re +import sys +import warnings +import json + +from collections import defaultdict +from heapq import heappush +from inspect import getargspec +from optparse import OptionParser, IndentedHelpFormatter, make_option as Option +from pprint import pformat + +from celery import VERSION_BANNER, Celery, maybe_patch_concurrency +from celery import signals +from celery.exceptions import CDeprecationWarning, CPendingDeprecationWarning +from celery.five import items, string, string_t +from celery.platforms import EX_FAILURE, EX_OK, EX_USAGE +from celery.utils import term +from celery.utils import text +from celery.utils import node_format, host_format +from celery.utils.imports import symbol_by_name, import_from_cwd + +try: + input = raw_input +except NameError: + pass + +# always enable DeprecationWarnings, so our users can see them. +for warning in (CDeprecationWarning, CPendingDeprecationWarning): + warnings.simplefilter('once', warning, 0) + +ARGV_DISABLED = """ +Unrecognized command-line arguments: {0} + +Try --help? +""" + +find_long_opt = re.compile(r'.+?(--.+?)(?:\s|,|$)') +find_rst_ref = re.compile(r':\w+:`(.+?)`') + +__all__ = ['Error', 'UsageError', 'Extensions', 'HelpFormatter', + 'Command', 'Option', 'daemon_options'] + + +class Error(Exception): + status = EX_FAILURE + + def __init__(self, reason, status=None): + self.reason = reason + self.status = status if status is not None else self.status + super(Error, self).__init__(reason, status) + + def __str__(self): + return self.reason + __unicode__ = __str__ + + +class UsageError(Error): + status = EX_USAGE + + +class Extensions(object): + + def __init__(self, namespace, register): + self.names = [] + self.namespace = namespace + self.register = register + + def add(self, cls, name): + heappush(self.names, name) + self.register(cls, name=name) + + def load(self): + try: + from pkg_resources import iter_entry_points + except ImportError: # pragma: no cover + return + + for ep in iter_entry_points(self.namespace): + sym = ':'.join([ep.module_name, ep.attrs[0]]) + try: + cls = symbol_by_name(sym) + except (ImportError, SyntaxError) as exc: + warnings.warn( + 'Cannot load extension {0!r}: {1!r}'.format(sym, exc)) + else: + self.add(cls, ep.name) + return self.names + + +class HelpFormatter(IndentedHelpFormatter): + + def format_epilog(self, epilog): + if epilog: + return '\n{0}\n\n'.format(epilog) + return '' + + def format_description(self, description): + return text.ensure_2lines(text.fill_paragraphs( + text.dedent(description), self.width)) + + +class Command(object): + """Base class for command-line applications. + + :keyword app: The current app. + :keyword get_app: Callable returning the current app if no app provided. + + """ + Error = Error + UsageError = UsageError + Parser = OptionParser + + #: Arg list used in help. + args = '' + + #: Application version. + version = VERSION_BANNER + + #: If false the parser will raise an exception if positional + #: args are provided. + supports_args = True + + #: List of options (without preload options). + option_list = () + + # module Rst documentation to parse help from (if any) + doc = None + + # Some programs (multi) does not want to load the app specified + # (Issue #1008). + respects_app_option = True + + #: List of options to parse before parsing other options. + preload_options = ( + Option('-A', '--app', default=None), + Option('-b', '--broker', default=None), + Option('--loader', default=None), + Option('--config', default=None), + Option('--workdir', default=None, dest='working_directory'), + Option('--no-color', '-C', action='store_true', default=None), + Option('--quiet', '-q', action='store_true'), + ) + + #: Enable if the application should support config from the cmdline. + enable_config_from_cmdline = False + + #: Default configuration namespace. + namespace = 'celery' + + #: Text to print at end of --help + epilog = None + + #: Text to print in --help before option list. + description = '' + + #: Set to true if this command doesn't have subcommands + leaf = True + + # used by :meth:`say_remote_command_reply`. + show_body = True + # used by :meth:`say_chat`. + show_reply = True + + prog_name = 'celery' + + def __init__(self, app=None, get_app=None, no_color=False, + stdout=None, stderr=None, quiet=False, on_error=None, + on_usage_error=None): + self.app = app + self.get_app = get_app or self._get_default_app + self.stdout = stdout or sys.stdout + self.stderr = stderr or sys.stderr + self._colored = None + self._no_color = no_color + self.quiet = quiet + if not self.description: + self.description = self.__doc__ + if on_error: + self.on_error = on_error + if on_usage_error: + self.on_usage_error = on_usage_error + + def run(self, *args, **options): + """This is the body of the command called by :meth:`handle_argv`.""" + raise NotImplementedError('subclass responsibility') + + def on_error(self, exc): + self.error(self.colored.red('Error: {0}'.format(exc))) + + def on_usage_error(self, exc): + self.handle_error(exc) + + def on_concurrency_setup(self): + pass + + def __call__(self, *args, **kwargs): + random.seed() # maybe we were forked. + self.verify_args(args) + try: + ret = self.run(*args, **kwargs) + return ret if ret is not None else EX_OK + except self.UsageError as exc: + self.on_usage_error(exc) + return exc.status + except self.Error as exc: + self.on_error(exc) + return exc.status + + def verify_args(self, given, _index=0): + S = getargspec(self.run) + _index = 1 if S.args and S.args[0] == 'self' else _index + required = S.args[_index:-len(S.defaults) if S.defaults else None] + missing = required[len(given):] + if missing: + raise self.UsageError('Missing required {0}: {1}'.format( + text.pluralize(len(missing), 'argument'), + ', '.join(missing) + )) + + def execute_from_commandline(self, argv=None): + """Execute application from command-line. + + :keyword argv: The list of command-line arguments. + Defaults to ``sys.argv``. + + """ + if argv is None: + argv = list(sys.argv) + # Should we load any special concurrency environment? + self.maybe_patch_concurrency(argv) + self.on_concurrency_setup() + + # Dump version and exit if '--version' arg set. + self.early_version(argv) + argv = self.setup_app_from_commandline(argv) + self.prog_name = os.path.basename(argv[0]) + return self.handle_argv(self.prog_name, argv[1:]) + + def run_from_argv(self, prog_name, argv=None, command=None): + return self.handle_argv(prog_name, + sys.argv if argv is None else argv, command) + + def maybe_patch_concurrency(self, argv=None): + argv = argv or sys.argv + pool_option = self.with_pool_option(argv) + if pool_option: + maybe_patch_concurrency(argv, *pool_option) + short_opts, long_opts = pool_option + + def usage(self, command): + return '%prog {0} [options] {self.args}'.format(command, self=self) + + def get_options(self): + """Get supported command-line options.""" + return self.option_list + + def expanduser(self, value): + if isinstance(value, string_t): + return os.path.expanduser(value) + return value + + def ask(self, q, choices, default=None): + """Prompt user to choose from a tuple of string values. + + :param q: the question to ask (do not include questionark) + :param choice: tuple of possible choices, must be lowercase. + :param default: Default value if any. + + If a default is not specified the question will be repeated + until the user gives a valid choice. + + Matching is done case insensitively. + + """ + schoices = choices + if default is not None: + schoices = [c.upper() if c == default else c.lower() + for c in choices] + schoices = '/'.join(schoices) + + p = '{0} ({1})? '.format(q.capitalize(), schoices) + while 1: + val = input(p).lower() + if val in choices: + return val + elif default is not None: + break + return default + + def handle_argv(self, prog_name, argv, command=None): + """Parse command-line arguments from ``argv`` and dispatch + to :meth:`run`. + + :param prog_name: The program name (``argv[0]``). + :param argv: Command arguments. + + Exits with an error message if :attr:`supports_args` is disabled + and ``argv`` contains positional arguments. + + """ + options, args = self.prepare_args( + *self.parse_options(prog_name, argv, command)) + return self(*args, **options) + + def prepare_args(self, options, args): + if options: + options = dict((k, self.expanduser(v)) + for k, v in items(vars(options)) + if not k.startswith('_')) + args = [self.expanduser(arg) for arg in args] + self.check_args(args) + return options, args + + def check_args(self, args): + if not self.supports_args and args: + self.die(ARGV_DISABLED.format(', '.join(args)), EX_USAGE) + + def error(self, s): + self.out(s, fh=self.stderr) + + def out(self, s, fh=None): + print(s, file=fh or self.stdout) + + def die(self, msg, status=EX_FAILURE): + self.error(msg) + sys.exit(status) + + def early_version(self, argv): + if '--version' in argv: + print(self.version, file=self.stdout) + sys.exit(0) + + def parse_options(self, prog_name, arguments, command=None): + """Parse the available options.""" + # Don't want to load configuration to just print the version, + # so we handle --version manually here. + self.parser = self.create_parser(prog_name, command) + return self.parser.parse_args(arguments) + + def create_parser(self, prog_name, command=None): + option_list = ( + self.preload_options + + self.get_options() + + tuple(self.app.user_options['preload']) + ) + return self.prepare_parser(self.Parser( + prog=prog_name, + usage=self.usage(command), + version=self.version, + epilog=self.epilog, + formatter=HelpFormatter(), + description=self.description, + option_list=option_list, + )) + + def prepare_parser(self, parser): + docs = [self.parse_doc(doc) for doc in (self.doc, __doc__) if doc] + for doc in docs: + for long_opt, help in items(doc): + option = parser.get_option(long_opt) + if option is not None: + option.help = ' '.join(help).format(default=option.default) + return parser + + def setup_app_from_commandline(self, argv): + preload_options = self.parse_preload_options(argv) + quiet = preload_options.get('quiet') + if quiet is not None: + self.quiet = quiet + try: + self.no_color = preload_options['no_color'] + except KeyError: + pass + workdir = preload_options.get('working_directory') + if workdir: + os.chdir(workdir) + app = (preload_options.get('app') or + os.environ.get('CELERY_APP') or + self.app) + preload_loader = preload_options.get('loader') + if preload_loader: + # Default app takes loader from this env (Issue #1066). + os.environ['CELERY_LOADER'] = preload_loader + loader = (preload_loader, + os.environ.get('CELERY_LOADER') or + 'default') + broker = preload_options.get('broker', None) + if broker: + os.environ['CELERY_BROKER_URL'] = broker + config = preload_options.get('config') + if config: + os.environ['CELERY_CONFIG_MODULE'] = config + if self.respects_app_option: + if app: + self.app = self.find_app(app) + elif self.app is None: + self.app = self.get_app(loader=loader) + if self.enable_config_from_cmdline: + argv = self.process_cmdline_config(argv) + else: + self.app = Celery(fixups=[]) + + user_preload = tuple(self.app.user_options['preload'] or ()) + if user_preload: + user_options = self.preparse_options(argv, user_preload) + for user_option in user_preload: + user_options.setdefault(user_option.dest, user_option.default) + signals.user_preload_options.send( + sender=self, app=self.app, options=user_options, + ) + return argv + + def find_app(self, app): + from celery.app.utils import find_app + return find_app(app, symbol_by_name=self.symbol_by_name) + + def symbol_by_name(self, name, imp=import_from_cwd): + return symbol_by_name(name, imp=imp) + get_cls_by_name = symbol_by_name # XXX compat + + def process_cmdline_config(self, argv): + try: + cargs_start = argv.index('--') + except ValueError: + return argv + argv, cargs = argv[:cargs_start], argv[cargs_start + 1:] + self.app.config_from_cmdline(cargs, namespace=self.namespace) + return argv + + def parse_preload_options(self, args): + return self.preparse_options(args, self.preload_options) + + def preparse_options(self, args, options): + acc = {} + opts = {} + for opt in options: + for t in (opt._long_opts, opt._short_opts): + opts.update(dict(zip(t, [opt] * len(t)))) + index = 0 + length = len(args) + while index < length: + arg = args[index] + if arg.startswith('--'): + if '=' in arg: + key, value = arg.split('=', 1) + opt = opts.get(key) + if opt: + acc[opt.dest] = value + else: + opt = opts.get(arg) + if opt and opt.takes_value(): + # optparse also supports ['--opt', 'value'] + # (Issue #1668) + acc[opt.dest] = args[index + 1] + index += 1 + elif opt and opt.action == 'store_true': + acc[opt.dest] = True + elif arg.startswith('-'): + opt = opts.get(arg) + if opt: + if opt.takes_value(): + try: + acc[opt.dest] = args[index + 1] + except IndexError: + raise ValueError( + 'Missing required argument for {0}'.format( + arg)) + index += 1 + elif opt.action == 'store_true': + acc[opt.dest] = True + index += 1 + return acc + + def parse_doc(self, doc): + options, in_option = defaultdict(list), None + for line in doc.splitlines(): + if line.startswith('.. cmdoption::'): + m = find_long_opt.match(line) + if m: + in_option = m.groups()[0].strip() + assert in_option, 'missing long opt' + elif in_option and line.startswith(' ' * 4): + options[in_option].append( + find_rst_ref.sub(r'\1', line.strip()).replace('`', '')) + return options + + def with_pool_option(self, argv): + """Return tuple of ``(short_opts, long_opts)`` if the command + supports a pool argument, and used to monkey patch eventlet/gevent + environments as early as possible. + + E.g:: + has_pool_option = (['-P'], ['--pool']) + """ + pass + + def node_format(self, s, nodename, **extra): + return node_format(s, nodename, **extra) + + def host_format(self, s, **extra): + return host_format(s, **extra) + + def _get_default_app(self, *args, **kwargs): + from celery._state import get_current_app + return get_current_app() # omit proxy + + def pretty_list(self, n): + c = self.colored + if not n: + return '- empty -' + return '\n'.join( + str(c.reset(c.white('*'), ' {0}'.format(item))) for item in n + ) + + def pretty_dict_ok_error(self, n): + c = self.colored + try: + return (c.green('OK'), + text.indent(self.pretty(n['ok'])[1], 4)) + except KeyError: + pass + return (c.red('ERROR'), + text.indent(self.pretty(n['error'])[1], 4)) + + def say_remote_command_reply(self, replies): + c = self.colored + node = next(iter(replies)) # <-- take first. + reply = replies[node] + status, preply = self.pretty(reply) + self.say_chat('->', c.cyan(node, ': ') + status, + text.indent(preply, 4) if self.show_reply else '') + + def pretty(self, n): + OK = str(self.colored.green('OK')) + if isinstance(n, list): + return OK, self.pretty_list(n) + if isinstance(n, dict): + if 'ok' in n or 'error' in n: + return self.pretty_dict_ok_error(n) + else: + return OK, json.dumps(n, sort_keys=True, indent=4) + if isinstance(n, string_t): + return OK, string(n) + return OK, pformat(n) + + def say_chat(self, direction, title, body=''): + c = self.colored + if direction == '<-' and self.quiet: + return + dirstr = not self.quiet and c.bold(c.white(direction), ' ') or '' + self.out(c.reset(dirstr, title)) + if body and self.show_body: + self.out(body) + + @property + def colored(self): + if self._colored is None: + self._colored = term.colored(enabled=not self.no_color) + return self._colored + + @colored.setter + def colored(self, obj): + self._colored = obj + + @property + def no_color(self): + return self._no_color + + @no_color.setter + def no_color(self, value): + self._no_color = value + if self._colored is not None: + self._colored.enabled = not self._no_color + + +def daemon_options(default_pidfile=None, default_logfile=None): + return ( + Option('-f', '--logfile', default=default_logfile), + Option('--pidfile', default=default_pidfile), + Option('--uid', default=None), + Option('--gid', default=None), + Option('--umask', default=None), + ) diff --git a/celery/bin/beat.py b/celery/bin/beat.py new file mode 100644 index 0000000..6b5b734 --- /dev/null +++ b/celery/bin/beat.py @@ -0,0 +1,100 @@ +# -*- coding: utf-8 -*- +""" + +The :program:`celery beat` command. + +.. program:: celery beat + +.. seealso:: + + See :ref:`preload-options` and :ref:`daemon-options`. + +.. cmdoption:: --detach + + Detach and run in the background as a daemon. + +.. cmdoption:: -s, --schedule + + Path to the schedule database. Defaults to `celerybeat-schedule`. + The extension '.db' may be appended to the filename. + Default is {default}. + +.. cmdoption:: -S, --scheduler + + Scheduler class to use. + Default is :class:`celery.beat.PersistentScheduler`. + +.. cmdoption:: --max-interval + + Max seconds to sleep between schedule iterations. + +.. cmdoption:: -f, --logfile + + Path to log file. If no logfile is specified, `stderr` is used. + +.. cmdoption:: -l, --loglevel + + Logging level, choose between `DEBUG`, `INFO`, `WARNING`, + `ERROR`, `CRITICAL`, or `FATAL`. + +""" +from __future__ import absolute_import + +from functools import partial + +from celery.platforms import detached, maybe_drop_privileges + +from celery.bin.base import Command, Option, daemon_options + +__all__ = ['beat'] + + +class beat(Command): + """Start the beat periodic task scheduler. + + Examples:: + + celery beat -l info + celery beat -s /var/run/celery/beat-schedule --detach + celery beat -S djcelery.schedulers.DatabaseScheduler + + """ + doc = __doc__ + enable_config_from_cmdline = True + supports_args = False + + def run(self, detach=False, logfile=None, pidfile=None, uid=None, + gid=None, umask=None, working_directory=None, **kwargs): + if not detach: + maybe_drop_privileges(uid=uid, gid=gid) + workdir = working_directory + kwargs.pop('app', None) + beat = partial(self.app.Beat, + logfile=logfile, pidfile=pidfile, **kwargs) + + if detach: + with detached(logfile, pidfile, uid, gid, umask, workdir): + return beat().run() + else: + return beat().run() + + def get_options(self): + c = self.app.conf + + return ( + (Option('--detach', action='store_true'), + Option('-s', '--schedule', + default=c.CELERYBEAT_SCHEDULE_FILENAME), + Option('--max-interval', type='float'), + Option('-S', '--scheduler', dest='scheduler_cls'), + Option('-l', '--loglevel', default=c.CELERYBEAT_LOG_LEVEL)) + + daemon_options(default_pidfile='celerybeat.pid') + + tuple(self.app.user_options['beat']) + ) + + +def main(app=None): + beat(app=app).execute_from_commandline() + +if __name__ == '__main__': # pragma: no cover + main() diff --git a/celery/bin/celery.py b/celery/bin/celery.py new file mode 100644 index 0000000..10d7c03 --- /dev/null +++ b/celery/bin/celery.py @@ -0,0 +1,826 @@ +# -*- coding: utf-8 -*- +""" + +The :program:`celery` umbrella command. + +.. program:: celery + +""" +from __future__ import absolute_import, unicode_literals + +import anyjson +import numbers +import os +import sys + +from functools import partial +from importlib import import_module + +from celery.five import string_t, values +from celery.platforms import EX_OK, EX_FAILURE, EX_UNAVAILABLE, EX_USAGE +from celery.utils import term +from celery.utils import text +from celery.utils.timeutils import maybe_iso8601 + +# Cannot use relative imports here due to a Windows issue (#1111). +from celery.bin.base import Command, Option, Extensions + +# Import commands from other modules +from celery.bin.amqp import amqp +from celery.bin.beat import beat +from celery.bin.events import events +from celery.bin.graph import graph +from celery.bin.worker import worker + +__all__ = ['CeleryCommand', 'main'] + +HELP = """ +---- -- - - ---- Commands- -------------- --- ------------ + +{commands} +---- -- - - --------- -- - -------------- --- ------------ + +Type '{prog_name} --help' for help using a specific command. +""" + +MIGRATE_PROGRESS_FMT = """\ +Migrating task {state.count}/{state.strtotal}: \ +{body[task]}[{body[id]}]\ +""" + +DEBUG = os.environ.get('C_DEBUG', False) + +command_classes = [ + ('Main', ['worker', 'events', 'beat', 'shell', 'multi', 'amqp'], 'green'), + ('Remote Control', ['status', 'inspect', 'control'], 'blue'), + ('Utils', ['purge', 'list', 'migrate', 'call', 'result', 'report'], None), +] +if DEBUG: # pragma: no cover + command_classes.append( + ('Debug', ['graph'], 'red'), + ) + + +def determine_exit_status(ret): + if isinstance(ret, numbers.Integral): + return ret + return EX_OK if ret else EX_FAILURE + + +def main(argv=None): + # Fix for setuptools generated scripts, so that it will + # work with multiprocessing fork emulation. + # (see multiprocessing.forking.get_preparation_data()) + try: + if __name__ != '__main__': # pragma: no cover + sys.modules['__main__'] = sys.modules[__name__] + cmd = CeleryCommand() + cmd.maybe_patch_concurrency() + from billiard import freeze_support + freeze_support() + cmd.execute_from_commandline(argv) + except KeyboardInterrupt: + pass + + +class multi(Command): + """Start multiple worker instances.""" + respects_app_option = False + + def get_options(self): + return () + + def run_from_argv(self, prog_name, argv, command=None): + from celery.bin.multi import MultiTool + multi = MultiTool(quiet=self.quiet, no_color=self.no_color) + return multi.execute_from_commandline( + [command] + argv, prog_name, + ) + + +class list_(Command): + """Get info from broker. + + Examples:: + + celery list bindings + + NOTE: For RabbitMQ the management plugin is required. + """ + args = '[bindings]' + + def list_bindings(self, management): + try: + bindings = management.get_bindings() + except NotImplementedError: + raise self.Error('Your transport cannot list bindings.') + + fmt = lambda q, e, r: self.out('{0:<28} {1:<28} {2}'.format(q, e, r)) + fmt('Queue', 'Exchange', 'Routing Key') + fmt('-' * 16, '-' * 16, '-' * 16) + for b in bindings: + fmt(b['destination'], b['source'], b['routing_key']) + + def run(self, what=None, *_, **kw): + topics = {'bindings': self.list_bindings} + available = ', '.join(topics) + if not what: + raise self.UsageError( + 'You must specify one of {0}'.format(available)) + if what not in topics: + raise self.UsageError( + 'unknown topic {0!r} (choose one of: {1})'.format( + what, available)) + with self.app.connection() as conn: + self.app.amqp.TaskConsumer(conn).declare() + topics[what](conn.manager) + + +class call(Command): + """Call a task by name. + + Examples:: + + celery call tasks.add --args='[2, 2]' + celery call tasks.add --args='[2, 2]' --countdown=10 + """ + args = '' + option_list = Command.option_list + ( + Option('--args', '-a', help='positional arguments (json).'), + Option('--kwargs', '-k', help='keyword arguments (json).'), + Option('--eta', help='scheduled time (ISO-8601).'), + Option('--countdown', type='float', + help='eta in seconds from now (float/int).'), + Option('--expires', help='expiry time (ISO-8601/float/int).'), + Option('--serializer', default='json', help='defaults to json.'), + Option('--queue', help='custom queue name.'), + Option('--exchange', help='custom exchange name.'), + Option('--routing-key', help='custom routing key.'), + ) + + def run(self, name, *_, **kw): + # Positional args. + args = kw.get('args') or () + if isinstance(args, string_t): + args = anyjson.loads(args) + + # Keyword args. + kwargs = kw.get('kwargs') or {} + if isinstance(kwargs, string_t): + kwargs = anyjson.loads(kwargs) + + # Expires can be int/float. + expires = kw.get('expires') or None + try: + expires = float(expires) + except (TypeError, ValueError): + # or a string describing an ISO 8601 datetime. + try: + expires = maybe_iso8601(expires) + except (TypeError, ValueError): + raise + + res = self.app.send_task(name, args=args, kwargs=kwargs, + countdown=kw.get('countdown'), + serializer=kw.get('serializer'), + queue=kw.get('queue'), + exchange=kw.get('exchange'), + routing_key=kw.get('routing_key'), + eta=maybe_iso8601(kw.get('eta')), + expires=expires) + self.out(res.id) + + +class purge(Command): + """Erase all messages from all known task queues. + + WARNING: There is no undo operation for this command. + + """ + warn_prelude = ( + '{warning}: This will remove all tasks from {queues}: {names}.\n' + ' There is no undo for this operation!\n\n' + '(to skip this prompt use the -f option)\n' + ) + warn_prompt = 'Are you sure you want to delete all tasks' + fmt_purged = 'Purged {mnum} {messages} from {qnum} known task {queues}.' + fmt_empty = 'No messages purged from {qnum} {queues}' + option_list = Command.option_list + ( + Option('--force', '-f', action='store_true', + help='Do not prompt for verification'), + ) + + def run(self, force=False, **kwargs): + names = list(sorted(self.app.amqp.queues.keys())) + qnum = len(names) + if not force: + self.out(self.warn_prelude.format( + warning=self.colored.red('WARNING'), + queues=text.pluralize(qnum, 'queue'), names=', '.join(names), + )) + if self.ask(self.warn_prompt, ('yes', 'no'), 'no') != 'yes': + return + messages = self.app.control.purge() + fmt = self.fmt_purged if messages else self.fmt_empty + self.out(fmt.format( + mnum=messages, qnum=qnum, + messages=text.pluralize(messages, 'message'), + queues=text.pluralize(qnum, 'queue'))) + + +class result(Command): + """Gives the return value for a given task id. + + Examples:: + + celery result 8f511516-e2f5-4da4-9d2f-0fb83a86e500 + celery result 8f511516-e2f5-4da4-9d2f-0fb83a86e500 -t tasks.add + celery result 8f511516-e2f5-4da4-9d2f-0fb83a86e500 --traceback + + """ + args = '' + option_list = Command.option_list + ( + Option('--task', '-t', help='name of task (if custom backend)'), + Option('--traceback', action='store_true', + help='show traceback instead'), + ) + + def run(self, task_id, *args, **kwargs): + result_cls = self.app.AsyncResult + task = kwargs.get('task') + traceback = kwargs.get('traceback', False) + + if task: + result_cls = self.app.tasks[task].AsyncResult + result = result_cls(task_id) + if traceback: + value = result.traceback + else: + value = result.get() + self.out(self.pretty(value)[1]) + + +class _RemoteControl(Command): + name = None + choices = None + leaf = False + option_list = Command.option_list + ( + Option('--timeout', '-t', type='float', + help='Timeout in seconds (float) waiting for reply'), + Option('--destination', '-d', + help='Comma separated list of destination node names.')) + + def __init__(self, *args, **kwargs): + self.show_body = kwargs.pop('show_body', True) + self.show_reply = kwargs.pop('show_reply', True) + super(_RemoteControl, self).__init__(*args, **kwargs) + + @classmethod + def get_command_info(self, command, + indent=0, prefix='', color=None, help=False): + if help: + help = '|' + text.indent(self.choices[command][1], indent + 4) + else: + help = None + try: + # see if it uses args. + meth = getattr(self, command) + return text.join([ + '|' + text.indent('{0}{1} {2}'.format( + prefix, color(command), meth.__doc__), indent), + help, + ]) + + except AttributeError: + return text.join([ + '|' + text.indent(prefix + str(color(command)), indent), help, + ]) + + @classmethod + def list_commands(self, indent=0, prefix='', color=None, help=False): + color = color if color else lambda x: x + prefix = prefix + ' ' if prefix else '' + return '\n'.join(self.get_command_info(c, indent, prefix, color, help) + for c in sorted(self.choices)) + + @property + def epilog(self): + return '\n'.join([ + '[Commands]', + self.list_commands(indent=4, help=True) + ]) + + def usage(self, command): + return '%prog {0} [options] {1} [arg1 .. argN]'.format( + command, self.args) + + def call(self, *args, **kwargs): + raise NotImplementedError('call') + + def run(self, *args, **kwargs): + if not args: + raise self.UsageError( + 'Missing {0.name} method. See --help'.format(self)) + return self.do_call_method(args, **kwargs) + + def do_call_method(self, args, **kwargs): + method = args[0] + if method == 'help': + raise self.Error("Did you mean '{0.name} --help'?".format(self)) + if method not in self.choices: + raise self.UsageError( + 'Unknown {0.name} method {1}'.format(self, method)) + + if self.app.connection().transport.driver_type == 'sql': + raise self.Error('Broadcast not supported by SQL broker transport') + + destination = kwargs.get('destination') + timeout = kwargs.get('timeout') or self.choices[method][0] + if destination and isinstance(destination, string_t): + destination = [dest.strip() for dest in destination.split(',')] + + handler = getattr(self, method, self.call) + + replies = handler(method, *args[1:], timeout=timeout, + destination=destination, + callback=self.say_remote_command_reply) + if not replies: + raise self.Error('No nodes replied within time constraint.', + status=EX_UNAVAILABLE) + return replies + + +class inspect(_RemoteControl): + """Inspect the worker at runtime. + + Availability: RabbitMQ (amqp), Redis, and MongoDB transports. + + Examples:: + + celery inspect active --timeout=5 + celery inspect scheduled -d worker1@example.com + celery inspect revoked -d w1@e.com,w2@e.com + + """ + name = 'inspect' + choices = { + 'active': (1.0, 'dump active tasks (being processed)'), + 'active_queues': (1.0, 'dump queues being consumed from'), + 'scheduled': (1.0, 'dump scheduled tasks (eta/countdown/retry)'), + 'reserved': (1.0, 'dump reserved tasks (waiting to be processed)'), + 'stats': (1.0, 'dump worker statistics'), + 'revoked': (1.0, 'dump of revoked task ids'), + 'registered': (1.0, 'dump of registered tasks'), + 'ping': (0.2, 'ping worker(s)'), + 'clock': (1.0, 'get value of logical clock'), + 'conf': (1.0, 'dump worker configuration'), + 'report': (1.0, 'get bugreport info'), + 'memsample': (1.0, 'sample memory (requires psutil)'), + 'memdump': (1.0, 'dump memory samples (requires psutil)'), + 'objgraph': (60.0, 'create object graph (requires objgraph)'), + } + + def call(self, method, *args, **options): + i = self.app.control.inspect(**options) + return getattr(i, method)(*args) + + def objgraph(self, type_='Request', *args, **kwargs): + return self.call('objgraph', type_, **kwargs) + + def conf(self, with_defaults=False, *args, **kwargs): + return self.call('conf', with_defaults, **kwargs) + + +class control(_RemoteControl): + """Workers remote control. + + Availability: RabbitMQ (amqp), Redis, and MongoDB transports. + + Examples:: + + celery control enable_events --timeout=5 + celery control -d worker1@example.com enable_events + celery control -d w1.e.com,w2.e.com enable_events + + celery control -d w1.e.com add_consumer queue_name + celery control -d w1.e.com cancel_consumer queue_name + + celery control -d w1.e.com add_consumer queue exchange direct rkey + + """ + name = 'control' + choices = { + 'enable_events': (1.0, 'tell worker(s) to enable events'), + 'disable_events': (1.0, 'tell worker(s) to disable events'), + 'add_consumer': (1.0, 'tell worker(s) to start consuming a queue'), + 'cancel_consumer': (1.0, 'tell worker(s) to stop consuming a queue'), + 'rate_limit': ( + 1.0, 'tell worker(s) to modify the rate limit for a task type'), + 'time_limit': ( + 1.0, 'tell worker(s) to modify the time limit for a task type.'), + 'autoscale': (1.0, 'change autoscale settings'), + 'pool_grow': (1.0, 'start more pool processes'), + 'pool_shrink': (1.0, 'use less pool processes'), + } + + def call(self, method, *args, **options): + return getattr(self.app.control, method)(*args, reply=True, **options) + + def pool_grow(self, method, n=1, **kwargs): + """[N=1]""" + return self.call(method, int(n), **kwargs) + + def pool_shrink(self, method, n=1, **kwargs): + """[N=1]""" + return self.call(method, int(n), **kwargs) + + def autoscale(self, method, max=None, min=None, **kwargs): + """[max] [min]""" + return self.call(method, int(max), int(min), **kwargs) + + def rate_limit(self, method, task_name, rate_limit, **kwargs): + """ (e.g. 5/s | 5/m | 5/h)>""" + return self.call(method, task_name, rate_limit, **kwargs) + + def time_limit(self, method, task_name, soft, hard=None, **kwargs): + """ [hard_secs]""" + return self.call(method, task_name, + float(soft), float(hard), **kwargs) + + def add_consumer(self, method, queue, exchange=None, + exchange_type='direct', routing_key=None, **kwargs): + """ [exchange [type [routing_key]]]""" + return self.call(method, queue, exchange, + exchange_type, routing_key, **kwargs) + + def cancel_consumer(self, method, queue, **kwargs): + """""" + return self.call(method, queue, **kwargs) + + +class status(Command): + """Show list of workers that are online.""" + option_list = inspect.option_list + + def run(self, *args, **kwargs): + I = inspect( + app=self.app, + no_color=kwargs.get('no_color', False), + stdout=self.stdout, stderr=self.stderr, + show_reply=False, show_body=False, quiet=True, + ) + replies = I.run('ping', **kwargs) + if not replies: + raise self.Error('No nodes replied within time constraint', + status=EX_UNAVAILABLE) + nodecount = len(replies) + if not kwargs.get('quiet', False): + self.out('\n{0} {1} online.'.format( + nodecount, text.pluralize(nodecount, 'node'))) + + +class migrate(Command): + """Migrate tasks from one broker to another. + + Examples:: + + celery migrate redis://localhost amqp://guest@localhost// + celery migrate django:// redis://localhost + + NOTE: This command is experimental, make sure you have + a backup of the tasks before you continue. + """ + args = ' ' + option_list = Command.option_list + ( + Option('--limit', '-n', type='int', + help='Number of tasks to consume (int)'), + Option('--timeout', '-t', type='float', default=1.0, + help='Timeout in seconds (float) waiting for tasks'), + Option('--ack-messages', '-a', action='store_true', + help='Ack messages from source broker.'), + Option('--tasks', '-T', + help='List of task names to filter on.'), + Option('--queues', '-Q', + help='List of queues to migrate.'), + Option('--forever', '-F', action='store_true', + help='Continually migrate tasks until killed.'), + ) + progress_fmt = MIGRATE_PROGRESS_FMT + + def on_migrate_task(self, state, body, message): + self.out(self.progress_fmt.format(state=state, body=body)) + + def run(self, source, destination, **kwargs): + from kombu import Connection + from celery.contrib.migrate import migrate_tasks + + migrate_tasks(Connection(source), + Connection(destination), + callback=self.on_migrate_task, + **kwargs) + + +class shell(Command): # pragma: no cover + """Start shell session with convenient access to celery symbols. + + The following symbols will be added to the main globals: + + - celery: the current application. + - chord, group, chain, chunks, + xmap, xstarmap subtask, Task + - all registered tasks. + + """ + option_list = Command.option_list + ( + Option('--ipython', '-I', + action='store_true', dest='force_ipython', + help='force iPython.'), + Option('--bpython', '-B', + action='store_true', dest='force_bpython', + help='force bpython.'), + Option('--python', '-P', + action='store_true', dest='force_python', + help='force default Python shell.'), + Option('--without-tasks', '-T', action='store_true', + help="don't add tasks to locals."), + Option('--eventlet', action='store_true', + help='use eventlet.'), + Option('--gevent', action='store_true', help='use gevent.'), + ) + + def run(self, force_ipython=False, force_bpython=False, + force_python=False, without_tasks=False, eventlet=False, + gevent=False, **kwargs): + sys.path.insert(0, os.getcwd()) + if eventlet: + import_module('celery.concurrency.eventlet') + if gevent: + import_module('celery.concurrency.gevent') + import celery + import celery.task.base + self.app.loader.import_default_modules() + self.locals = {'app': self.app, + 'celery': self.app, + 'Task': celery.Task, + 'chord': celery.chord, + 'group': celery.group, + 'chain': celery.chain, + 'chunks': celery.chunks, + 'xmap': celery.xmap, + 'xstarmap': celery.xstarmap, + 'subtask': celery.subtask, + 'signature': celery.signature} + + if not without_tasks: + self.locals.update(dict( + (task.__name__, task) for task in values(self.app.tasks) + if not task.name.startswith('celery.')), + ) + + if force_python: + return self.invoke_fallback_shell() + elif force_bpython: + return self.invoke_bpython_shell() + elif force_ipython: + return self.invoke_ipython_shell() + return self.invoke_default_shell() + + def invoke_default_shell(self): + try: + import IPython # noqa + except ImportError: + try: + import bpython # noqa + except ImportError: + return self.invoke_fallback_shell() + else: + return self.invoke_bpython_shell() + else: + return self.invoke_ipython_shell() + + def invoke_fallback_shell(self): + import code + try: + import readline + except ImportError: + pass + else: + import rlcompleter + readline.set_completer( + rlcompleter.Completer(self.locals).complete) + readline.parse_and_bind('tab:complete') + code.interact(local=self.locals) + + def invoke_ipython_shell(self): + try: + from IPython.terminal import embed + embed.TerminalInteractiveShell(user_ns=self.locals).mainloop() + except ImportError: # ipython < 0.11 + from IPython.Shell import IPShell + IPShell(argv=[], user_ns=self.locals).mainloop() + + def invoke_bpython_shell(self): + import bpython + bpython.embed(self.locals) + + +class help(Command): + """Show help screen and exit.""" + + def usage(self, command): + return '%prog [options] {0.args}'.format(self) + + def run(self, *args, **kwargs): + self.parser.print_help() + self.out(HELP.format( + prog_name=self.prog_name, + commands=CeleryCommand.list_commands(colored=self.colored), + )) + + return EX_USAGE + + +class report(Command): + """Shows information useful to include in bugreports.""" + + def run(self, *args, **kwargs): + self.out(self.app.bugreport()) + return EX_OK + + +class CeleryCommand(Command): + namespace = 'celery' + ext_fmt = '{self.namespace}.commands' + commands = { + 'amqp': amqp, + 'beat': beat, + 'call': call, + 'control': control, + 'events': events, + 'graph': graph, + 'help': help, + 'inspect': inspect, + 'list': list_, + 'migrate': migrate, + 'multi': multi, + 'purge': purge, + 'report': report, + 'result': result, + 'shell': shell, + 'status': status, + 'worker': worker, + + } + enable_config_from_cmdline = True + prog_name = 'celery' + + @classmethod + def register_command(cls, fun, name=None): + cls.commands[name or fun.__name__] = fun + return fun + + def execute(self, command, argv=None): + try: + cls = self.commands[command] + except KeyError: + cls, argv = self.commands['help'], ['help'] + cls = self.commands.get(command) or self.commands['help'] + try: + return cls( + app=self.app, on_error=self.on_error, + no_color=self.no_color, quiet=self.quiet, + on_usage_error=partial(self.on_usage_error, command=command), + ).run_from_argv(self.prog_name, argv[1:], command=argv[0]) + except self.UsageError as exc: + self.on_usage_error(exc) + return exc.status + except self.Error as exc: + self.on_error(exc) + return exc.status + + def on_usage_error(self, exc, command=None): + if command: + helps = '{self.prog_name} {command} --help' + else: + helps = '{self.prog_name} --help' + self.error(self.colored.magenta('Error: {0}'.format(exc))) + self.error("""Please try '{0}'""".format(helps.format( + self=self, command=command, + ))) + + def _relocate_args_from_start(self, argv, index=0): + if argv: + rest = [] + while index < len(argv): + value = argv[index] + if value.startswith('--'): + rest.append(value) + elif value.startswith('-'): + # we eat the next argument even though we don't know + # if this option takes an argument or not. + # instead we will assume what is the command name in the + # return statements below. + try: + nxt = argv[index + 1] + if nxt.startswith('-'): + # is another option + rest.append(value) + else: + # is (maybe) a value for this option + rest.extend([value, nxt]) + index += 1 + except IndexError: + rest.append(value) + break + else: + break + index += 1 + if argv[index:]: + # if there are more arguments left then divide and swap + # we assume the first argument in argv[i:] is the command + # name. + return argv[index:] + rest + # if there are no more arguments then the last arg in rest' + # must be the command. + [rest.pop()] + rest + return [] + + def prepare_prog_name(self, name): + if name == '__main__.py': + return sys.modules['__main__'].__file__ + return name + + def handle_argv(self, prog_name, argv): + self.prog_name = self.prepare_prog_name(prog_name) + argv = self._relocate_args_from_start(argv) + _, argv = self.prepare_args(None, argv) + try: + command = argv[0] + except IndexError: + command, argv = 'help', ['help'] + return self.execute(command, argv) + + def execute_from_commandline(self, argv=None): + argv = sys.argv if argv is None else argv + if 'multi' in argv[1:3]: # Issue 1008 + self.respects_app_option = False + try: + sys.exit(determine_exit_status( + super(CeleryCommand, self).execute_from_commandline(argv))) + except KeyboardInterrupt: + sys.exit(EX_FAILURE) + + @classmethod + def get_command_info(self, command, indent=0, color=None, colored=None): + colored = term.colored() if colored is None else colored + colored = colored.names[color] if color else lambda x: x + obj = self.commands[command] + cmd = 'celery {0}'.format(colored(command)) + if obj.leaf: + return '|' + text.indent(cmd, indent) + return text.join([ + ' ', + '|' + text.indent('{0} --help'.format(cmd), indent), + obj.list_commands(indent, 'celery {0}'.format(command), colored), + ]) + + @classmethod + def list_commands(self, indent=0, colored=None): + colored = term.colored() if colored is None else colored + white = colored.white + ret = [] + for cls, commands, color in command_classes: + ret.extend([ + text.indent('+ {0}: '.format(white(cls)), indent), + '\n'.join( + self.get_command_info(command, indent + 4, color, colored) + for command in commands), + '' + ]) + return '\n'.join(ret).strip() + + def with_pool_option(self, argv): + if len(argv) > 1 and 'worker' in argv[0:3]: + # this command supports custom pools + # that may have to be loaded as early as possible. + return (['-P'], ['--pool']) + + def on_concurrency_setup(self): + self.load_extension_commands() + + def load_extension_commands(self): + names = Extensions(self.ext_fmt.format(self=self), + self.register_command).load() + if names: + command_classes.append(('Extensions', names, 'magenta')) + + +def command(*args, **kwargs): + """Deprecated: Use classmethod :meth:`CeleryCommand.register_command` + instead.""" + _register = CeleryCommand.register_command + return _register(args[0]) if args else _register + + +if __name__ == '__main__': # pragma: no cover + main() diff --git a/celery/bin/celeryd_detach.py b/celery/bin/celeryd_detach.py new file mode 100644 index 0000000..172d90e --- /dev/null +++ b/celery/bin/celeryd_detach.py @@ -0,0 +1,169 @@ +# -*- coding: utf-8 -*- +""" + celery.bin.celeryd_detach + ~~~~~~~~~~~~~~~~~~~~~~~~~ + + Program used to daemonize the worker + + Using :func:`os.execv` because forking and multiprocessing + leads to weird issues (it was a long time ago now, but it + could have something to do with the threading mutex bug) + +""" +from __future__ import absolute_import + +import celery +import os +import sys + +from optparse import OptionParser, BadOptionError + +from celery.platforms import EX_FAILURE, detached +from celery.utils.log import get_logger + +from celery.bin.base import daemon_options, Option + +__all__ = ['detached_celeryd', 'detach'] + +logger = get_logger(__name__) + +C_FAKEFORK = os.environ.get('C_FAKEFORK') + +OPTION_LIST = daemon_options(default_pidfile='celeryd.pid') + ( + Option('--workdir', default=None, dest='working_directory'), + Option('--fake', + default=False, action='store_true', dest='fake', + help="Don't fork (for debugging purposes)"), +) + + +def detach(path, argv, logfile=None, pidfile=None, uid=None, + gid=None, umask=None, working_directory=None, fake=False, app=None): + fake = 1 if C_FAKEFORK else fake + with detached(logfile, pidfile, uid, gid, umask, working_directory, fake): + try: + os.execv(path, [path] + argv) + except Exception: + if app is None: + from celery import current_app + app = current_app + app.log.setup_logging_subsystem('ERROR', logfile) + logger.critical("Can't exec %r", ' '.join([path] + argv), + exc_info=True) + return EX_FAILURE + + +class PartialOptionParser(OptionParser): + + def __init__(self, *args, **kwargs): + self.leftovers = [] + OptionParser.__init__(self, *args, **kwargs) + + def _process_long_opt(self, rargs, values): + arg = rargs.pop(0) + + if '=' in arg: + opt, next_arg = arg.split('=', 1) + rargs.insert(0, next_arg) + had_explicit_value = True + else: + opt = arg + had_explicit_value = False + + try: + opt = self._match_long_opt(opt) + option = self._long_opt.get(opt) + except BadOptionError: + option = None + + if option: + if option.takes_value(): + nargs = option.nargs + if len(rargs) < nargs: + if nargs == 1: + self.error('{0} requires an argument'.format(opt)) + else: + self.error('{0} requires {1} arguments'.format( + opt, nargs)) + elif nargs == 1: + value = rargs.pop(0) + else: + value = tuple(rargs[0:nargs]) + del rargs[0:nargs] + + elif had_explicit_value: + self.error('{0} option does not take a value'.format(opt)) + else: + value = None + option.process(opt, value, values, self) + else: + self.leftovers.append(arg) + + def _process_short_opts(self, rargs, values): + arg = rargs[0] + try: + OptionParser._process_short_opts(self, rargs, values) + except BadOptionError: + self.leftovers.append(arg) + if rargs and not rargs[0][0] == '-': + self.leftovers.append(rargs.pop(0)) + + +class detached_celeryd(object): + option_list = OPTION_LIST + usage = '%prog [options] [celeryd options]' + version = celery.VERSION_BANNER + description = ('Detaches Celery worker nodes. See `celery worker --help` ' + 'for the list of supported worker arguments.') + command = sys.executable + execv_path = sys.executable + if sys.version_info < (2, 7): # does not support pkg/__main__.py + execv_argv = ['-m', 'celery.__main__', 'worker'] + else: + execv_argv = ['-m', 'celery', 'worker'] + + def __init__(self, app=None): + self.app = app + + def Parser(self, prog_name): + return PartialOptionParser(prog=prog_name, + option_list=self.option_list, + usage=self.usage, + description=self.description, + version=self.version) + + def parse_options(self, prog_name, argv): + parser = self.Parser(prog_name) + options, values = parser.parse_args(argv) + if options.logfile: + parser.leftovers.append('--logfile={0}'.format(options.logfile)) + if options.pidfile: + parser.leftovers.append('--pidfile={0}'.format(options.pidfile)) + return options, values, parser.leftovers + + def execute_from_commandline(self, argv=None): + if argv is None: + argv = sys.argv + config = [] + seen_cargs = 0 + for arg in argv: + if seen_cargs: + config.append(arg) + else: + if arg == '--': + seen_cargs = 1 + config.append(arg) + prog_name = os.path.basename(argv[0]) + options, values, leftovers = self.parse_options(prog_name, argv[1:]) + sys.exit(detach( + app=self.app, path=self.execv_path, + argv=self.execv_argv + leftovers + config, + **vars(options) + )) + + +def main(app=None): + detached_celeryd(app).execute_from_commandline() + +if __name__ == '__main__': # pragma: no cover + main() diff --git a/celery/bin/events.py b/celery/bin/events.py new file mode 100644 index 0000000..d987505 --- /dev/null +++ b/celery/bin/events.py @@ -0,0 +1,139 @@ +# -*- coding: utf-8 -*- +""" + +The :program:`celery events` command. + +.. program:: celery events + +.. seealso:: + + See :ref:`preload-options` and :ref:`daemon-options`. + +.. cmdoption:: -d, --dump + + Dump events to stdout. + +.. cmdoption:: -c, --camera + + Take snapshots of events using this camera. + +.. cmdoption:: --detach + + Camera: Detach and run in the background as a daemon. + +.. cmdoption:: -F, --freq, --frequency + + Camera: Shutter frequency. Default is every 1.0 seconds. + +.. cmdoption:: -r, --maxrate + + Camera: Optional shutter rate limit (e.g. 10/m). + +.. cmdoption:: -l, --loglevel + + Logging level, choose between `DEBUG`, `INFO`, `WARNING`, + `ERROR`, `CRITICAL`, or `FATAL`. Default is INFO. + +""" +from __future__ import absolute_import, unicode_literals + +import sys + +from functools import partial + +from celery.platforms import detached, set_process_title, strargv +from celery.bin.base import Command, Option, daemon_options + +__all__ = ['events'] + + +class events(Command): + """Event-stream utilities. + + Commands:: + + celery events --app=proj + start graphical monitor (requires curses) + celery events -d --app=proj + dump events to screen. + celery events -b amqp:// + celery events -c [options] + run snapshot camera. + + Examples:: + + celery events + celery events -d + celery events -c mod.attr -F 1.0 --detach --maxrate=100/m -l info + """ + doc = __doc__ + supports_args = False + + def run(self, dump=False, camera=None, frequency=1.0, maxrate=None, + loglevel='INFO', logfile=None, prog_name='celery events', + pidfile=None, uid=None, gid=None, umask=None, + working_directory=None, detach=False, **kwargs): + self.prog_name = prog_name + + if dump: + return self.run_evdump() + if camera: + return self.run_evcam(camera, freq=frequency, maxrate=maxrate, + loglevel=loglevel, logfile=logfile, + pidfile=pidfile, uid=uid, gid=gid, + umask=umask, + working_directory=working_directory, + detach=detach) + return self.run_evtop() + + def run_evdump(self): + from celery.events.dumper import evdump + self.set_process_status('dump') + return evdump(app=self.app) + + def run_evtop(self): + from celery.events.cursesmon import evtop + self.set_process_status('top') + return evtop(app=self.app) + + def run_evcam(self, camera, logfile=None, pidfile=None, uid=None, + gid=None, umask=None, working_directory=None, + detach=False, **kwargs): + from celery.events.snapshot import evcam + workdir = working_directory + self.set_process_status('cam') + kwargs['app'] = self.app + cam = partial(evcam, camera, + logfile=logfile, pidfile=pidfile, **kwargs) + + if detach: + with detached(logfile, pidfile, uid, gid, umask, workdir): + return cam() + else: + return cam() + + def set_process_status(self, prog, info=''): + prog = '{0}:{1}'.format(self.prog_name, prog) + info = '{0} {1}'.format(info, strargv(sys.argv)) + return set_process_title(prog, info=info) + + def get_options(self): + return ( + (Option('-d', '--dump', action='store_true'), + Option('-c', '--camera'), + Option('--detach', action='store_true'), + Option('-F', '--frequency', '--freq', + type='float', default=1.0), + Option('-r', '--maxrate'), + Option('-l', '--loglevel', default='INFO')) + + daemon_options(default_pidfile='celeryev.pid') + + tuple(self.app.user_options['events']) + ) + + +def main(): + ev = events() + ev.execute_from_commandline() + +if __name__ == '__main__': # pragma: no cover + main() diff --git a/celery/bin/graph.py b/celery/bin/graph.py new file mode 100644 index 0000000..5d58476 --- /dev/null +++ b/celery/bin/graph.py @@ -0,0 +1,191 @@ +# -*- coding: utf-8 -*- +""" + +The :program:`celery graph` command. + +.. program:: celery graph + +""" +from __future__ import absolute_import, unicode_literals + +from operator import itemgetter + +from celery.datastructures import DependencyGraph, GraphFormatter +from celery.five import items + +from .base import Command + +__all__ = ['graph'] + + +class graph(Command): + args = """ [arguments] + ..... bootsteps [worker] [consumer] + ..... workers [enumerate] + """ + + def run(self, what=None, *args, **kwargs): + map = {'bootsteps': self.bootsteps, 'workers': self.workers} + if not what: + raise self.UsageError('missing type') + elif what not in map: + raise self.Error('no graph {0} in {1}'.format(what, '|'.join(map))) + return map[what](*args, **kwargs) + + def bootsteps(self, *args, **kwargs): + worker = self.app.WorkController() + include = set(arg.lower() for arg in args or ['worker', 'consumer']) + if 'worker' in include: + graph = worker.blueprint.graph + if 'consumer' in include: + worker.blueprint.connect_with(worker.consumer.blueprint) + else: + graph = worker.consumer.blueprint.graph + graph.to_dot(self.stdout) + + def workers(self, *args, **kwargs): + + def simplearg(arg): + return maybe_list(itemgetter(0, 2)(arg.partition(':'))) + + def maybe_list(l, sep=','): + return (l[0], l[1].split(sep) if sep in l[1] else l[1]) + + args = dict(simplearg(arg) for arg in args) + generic = 'generic' in args + + def generic_label(node): + return '{0} ({1}://)'.format(type(node).__name__, + node._label.split('://')[0]) + + class Node(object): + force_label = None + scheme = {} + + def __init__(self, label, pos=None): + self._label = label + self.pos = pos + + def label(self): + return self._label + + def __str__(self): + return self.label() + + class Thread(Node): + scheme = {'fillcolor': 'lightcyan4', 'fontcolor': 'yellow', + 'shape': 'oval', 'fontsize': 10, 'width': 0.3, + 'color': 'black'} + + def __init__(self, label, **kwargs): + self._label = 'thr-{0}'.format(next(tids)) + self.real_label = label + self.pos = 0 + + class Formatter(GraphFormatter): + + def label(self, obj): + return obj and obj.label() + + def node(self, obj): + scheme = dict(obj.scheme) if obj.pos else obj.scheme + if isinstance(obj, Thread): + scheme['label'] = obj.real_label + return self.draw_node( + obj, dict(self.node_scheme, **scheme), + ) + + def terminal_node(self, obj): + return self.draw_node( + obj, dict(self.term_scheme, **obj.scheme), + ) + + def edge(self, a, b, **attrs): + if isinstance(a, Thread): + attrs.update(arrowhead='none', arrowtail='tee') + return self.draw_edge(a, b, self.edge_scheme, attrs) + + def subscript(n): + S = {'0': '₀', '1': '₁', '2': '₂', '3': '₃', '4': '₄', + '5': '₅', '6': '₆', '7': '₇', '8': '₈', '9': '₉'} + return ''.join([S[i] for i in str(n)]) + + class Worker(Node): + pass + + class Backend(Node): + scheme = {'shape': 'folder', 'width': 2, + 'height': 1, 'color': 'black', + 'fillcolor': 'peachpuff3', 'color': 'peachpuff4'} + + def label(self): + return generic_label(self) if generic else self._label + + class Broker(Node): + scheme = {'shape': 'circle', 'fillcolor': 'cadetblue3', + 'color': 'cadetblue4', 'height': 1} + + def label(self): + return generic_label(self) if generic else self._label + + from itertools import count + tids = count(1) + Wmax = int(args.get('wmax', 4) or 0) + Tmax = int(args.get('tmax', 3) or 0) + + def maybe_abbr(l, name, max=Wmax): + size = len(l) + abbr = max and size > max + if 'enumerate' in args: + l = ['{0}{1}'.format(name, subscript(i + 1)) + for i, obj in enumerate(l)] + if abbr: + l = l[0:max - 1] + [l[size - 1]] + l[max - 2] = '{0}⎨…{1}⎬'.format( + name[0], subscript(size - (max - 1))) + return l + + try: + workers = args['nodes'] + threads = args.get('threads') or [] + except KeyError: + replies = self.app.control.inspect().stats() + workers, threads = [], [] + for worker, reply in items(replies): + workers.append(worker) + threads.append(reply['pool']['max-concurrency']) + + wlen = len(workers) + backend = args.get('backend', self.app.conf.CELERY_RESULT_BACKEND) + threads_for = {} + workers = maybe_abbr(workers, 'Worker') + if Wmax and wlen > Wmax: + threads = threads[0:3] + [threads[-1]] + for i, threads in enumerate(threads): + threads_for[workers[i]] = maybe_abbr( + list(range(int(threads))), 'P', Tmax, + ) + + broker = Broker(args.get('broker', self.app.connection().as_uri())) + backend = Backend(backend) if backend else None + graph = DependencyGraph(formatter=Formatter()) + graph.add_arc(broker) + if backend: + graph.add_arc(backend) + curworker = [0] + for i, worker in enumerate(workers): + worker = Worker(worker, pos=i) + graph.add_arc(worker) + graph.add_edge(worker, broker) + if backend: + graph.add_edge(worker, backend) + threads = threads_for.get(worker._label) + if threads: + for thread in threads: + thread = Thread(thread) + graph.add_arc(thread) + graph.add_edge(thread, worker) + + curworker[0] += 1 + + graph.to_dot(self.stdout) diff --git a/celery/bin/multi.py b/celery/bin/multi.py new file mode 100644 index 0000000..dc114e3 --- /dev/null +++ b/celery/bin/multi.py @@ -0,0 +1,640 @@ +# -*- coding: utf-8 -*- +""" + +.. program:: celery multi + +Examples +======== + +.. code-block:: bash + + # Single worker with explicit name and events enabled. + $ celery multi start Leslie -E + + # Pidfiles and logfiles are stored in the current directory + # by default. Use --pidfile and --logfile argument to change + # this. The abbreviation %N will be expanded to the current + # node name. + $ celery multi start Leslie -E --pidfile=/var/run/celery/%N.pid + --logfile=/var/log/celery/%N.log + + + # You need to add the same arguments when you restart, + # as these are not persisted anywhere. + $ celery multi restart Leslie -E --pidfile=/var/run/celery/%N.pid + --logfile=/var/run/celery/%N.log + + # To stop the node, you need to specify the same pidfile. + $ celery multi stop Leslie --pidfile=/var/run/celery/%N.pid + + # 3 workers, with 3 processes each + $ celery multi start 3 -c 3 + celery worker -n celery1@myhost -c 3 + celery worker -n celery2@myhost -c 3 + celery worker -n celery3@myhost -c 3 + + # start 3 named workers + $ celery multi start image video data -c 3 + celery worker -n image@myhost -c 3 + celery worker -n video@myhost -c 3 + celery worker -n data@myhost -c 3 + + # specify custom hostname + $ celery multi start 2 --hostname=worker.example.com -c 3 + celery worker -n celery1@worker.example.com -c 3 + celery worker -n celery2@worker.example.com -c 3 + + # specify fully qualified nodenames + $ celery multi start foo@worker.example.com bar@worker.example.com -c 3 + + # Advanced example starting 10 workers in the background: + # * Three of the workers processes the images and video queue + # * Two of the workers processes the data queue with loglevel DEBUG + # * the rest processes the default' queue. + $ celery multi start 10 -l INFO -Q:1-3 images,video -Q:4,5 data + -Q default -L:4,5 DEBUG + + # You can show the commands necessary to start the workers with + # the 'show' command: + $ celery multi show 10 -l INFO -Q:1-3 images,video -Q:4,5 data + -Q default -L:4,5 DEBUG + + # Additional options are added to each celery worker' comamnd, + # but you can also modify the options for ranges of, or specific workers + + # 3 workers: Two with 3 processes, and one with 10 processes. + $ celery multi start 3 -c 3 -c:1 10 + celery worker -n celery1@myhost -c 10 + celery worker -n celery2@myhost -c 3 + celery worker -n celery3@myhost -c 3 + + # can also specify options for named workers + $ celery multi start image video data -c 3 -c:image 10 + celery worker -n image@myhost -c 10 + celery worker -n video@myhost -c 3 + celery worker -n data@myhost -c 3 + + # ranges and lists of workers in options is also allowed: + # (-c:1-3 can also be written as -c:1,2,3) + $ celery multi start 5 -c 3 -c:1-3 10 + celery worker -n celery1@myhost -c 10 + celery worker -n celery2@myhost -c 10 + celery worker -n celery3@myhost -c 10 + celery worker -n celery4@myhost -c 3 + celery worker -n celery5@myhost -c 3 + + # lists also works with named workers + $ celery multi start foo bar baz xuzzy -c 3 -c:foo,bar,baz 10 + celery worker -n foo@myhost -c 10 + celery worker -n bar@myhost -c 10 + celery worker -n baz@myhost -c 10 + celery worker -n xuzzy@myhost -c 3 + +""" +from __future__ import absolute_import, print_function, unicode_literals + +import errno +import os +import shlex +import signal +import socket +import sys + +from collections import defaultdict, namedtuple +from subprocess import Popen +from time import sleep + +from kombu.utils import cached_property +from kombu.utils.compat import OrderedDict +from kombu.utils.encoding import from_utf8 + +from celery import VERSION_BANNER +from celery.five import items +from celery.platforms import Pidfile, IS_WINDOWS +from celery.utils import term, nodesplit +from celery.utils.text import pluralize + +__all__ = ['MultiTool'] + +SIGNAMES = set(sig for sig in dir(signal) + if sig.startswith('SIG') and '_' not in sig) +SIGMAP = dict((getattr(signal, name), name) for name in SIGNAMES) + +USAGE = """\ +usage: {prog_name} start [worker options] + {prog_name} stop [-SIG (default: -TERM)] + {prog_name} stopwait [-SIG (default: -TERM)] + {prog_name} restart [-SIG] [worker options] + {prog_name} kill + + {prog_name} show [worker options] + {prog_name} get hostname [-qv] [worker options] + {prog_name} names + {prog_name} expand template + {prog_name} help + +additional options (must appear after command name): + + * --nosplash: Don't display program info. + * --quiet: Don't show as much output. + * --verbose: Show more output. + * --no-color: Don't display colors. +""" + +multi_args_t = namedtuple( + 'multi_args_t', ('name', 'argv', 'expander', 'namespace'), +) + + +def main(): + sys.exit(MultiTool().execute_from_commandline(sys.argv)) + + +CELERY_EXE = 'celery' +if sys.version_info < (2, 7): + # pkg.__main__ first supported in Py2.7 + CELERY_EXE = 'celery.__main__' + + +def celery_exe(*args): + return ' '.join((CELERY_EXE, ) + args) + + +class MultiTool(object): + retcode = 0 # Final exit code. + + def __init__(self, env=None, fh=None, quiet=False, verbose=False, + no_color=False, nosplash=False): + self.fh = fh or sys.stderr + self.env = env + self.nosplash = nosplash + self.quiet = quiet + self.verbose = verbose + self.no_color = no_color + self.prog_name = 'celery multi' + self.commands = {'start': self.start, + 'show': self.show, + 'stop': self.stop, + 'stopwait': self.stopwait, + 'stop_verify': self.stopwait, # compat alias + 'restart': self.restart, + 'kill': self.kill, + 'names': self.names, + 'expand': self.expand, + 'get': self.get, + 'help': self.help} + + def execute_from_commandline(self, argv, cmd='celery worker'): + argv = list(argv) # don't modify callers argv. + + # Reserve the --nosplash|--quiet|-q/--verbose options. + if '--nosplash' in argv: + self.nosplash = argv.pop(argv.index('--nosplash')) + if '--quiet' in argv: + self.quiet = argv.pop(argv.index('--quiet')) + if '-q' in argv: + self.quiet = argv.pop(argv.index('-q')) + if '--verbose' in argv: + self.verbose = argv.pop(argv.index('--verbose')) + if '--no-color' in argv: + self.no_color = argv.pop(argv.index('--no-color')) + + self.prog_name = os.path.basename(argv.pop(0)) + if not argv or argv[0][0] == '-': + return self.error() + + try: + self.commands[argv[0]](argv[1:], cmd) + except KeyError: + self.error('Invalid command: {0}'.format(argv[0])) + + return self.retcode + + def say(self, m, newline=True): + print(m, file=self.fh, end='\n' if newline else '') + + def names(self, argv, cmd): + p = NamespacedOptionParser(argv) + self.say('\n'.join( + n.name for n in multi_args(p, cmd)), + ) + + def get(self, argv, cmd): + wanted = argv[0] + p = NamespacedOptionParser(argv[1:]) + for node in multi_args(p, cmd): + if node.name == wanted: + self.say(' '.join(node.argv)) + return + + def show(self, argv, cmd): + p = NamespacedOptionParser(argv) + self.with_detacher_default_options(p) + self.say('\n'.join( + ' '.join([sys.executable] + n.argv) for n in multi_args(p, cmd)), + ) + + def start(self, argv, cmd): + self.splash() + p = NamespacedOptionParser(argv) + self.with_detacher_default_options(p) + retcodes = [] + self.note('> Starting nodes...') + for node in multi_args(p, cmd): + self.note('\t> {0}: '.format(node.name), newline=False) + retcode = self.waitexec(node.argv) + self.note(retcode and self.FAILED or self.OK) + retcodes.append(retcode) + self.retcode = int(any(retcodes)) + + def with_detacher_default_options(self, p): + _setdefaultopt(p.options, ['--pidfile', '-p'], '%N.pid') + _setdefaultopt(p.options, ['--logfile', '-f'], '%N.log') + p.options.setdefault( + '--cmd', + '-m {0}'.format(celery_exe('worker', '--detach')), + ) + + def signal_node(self, nodename, pid, sig): + try: + os.kill(pid, sig) + except OSError as exc: + if exc.errno != errno.ESRCH: + raise + self.note('Could not signal {0} ({1}): No such process'.format( + nodename, pid)) + return False + return True + + def node_alive(self, pid): + try: + os.kill(pid, 0) + except OSError as exc: + if exc.errno == errno.ESRCH: + return False + raise + return True + + def shutdown_nodes(self, nodes, sig=signal.SIGTERM, retry=None, + callback=None): + if not nodes: + return + P = set(nodes) + + def on_down(node): + P.discard(node) + if callback: + callback(*node) + + self.note(self.colored.blue('> Stopping nodes...')) + for node in list(P): + if node in P: + nodename, _, pid = node + self.note('\t> {0}: {1} -> {2}'.format( + nodename, SIGMAP[sig][3:], pid)) + if not self.signal_node(nodename, pid, sig): + on_down(node) + + def note_waiting(): + left = len(P) + if left: + pids = ', '.join(str(pid) for _, _, pid in P) + self.note(self.colored.blue( + '> Waiting for {0} {1} -> {2}...'.format( + left, pluralize(left, 'node'), pids)), newline=False) + + if retry: + note_waiting() + its = 0 + while P: + for node in P: + its += 1 + self.note('.', newline=False) + nodename, _, pid = node + if not self.node_alive(pid): + self.note('\n\t> {0}: {1}'.format(nodename, self.OK)) + on_down(node) + note_waiting() + break + if P and not its % len(P): + sleep(float(retry)) + self.note('') + + def getpids(self, p, cmd, callback=None): + _setdefaultopt(p.options, ['--pidfile', '-p'], '%N.pid') + + nodes = [] + for node in multi_args(p, cmd): + try: + pidfile_template = _getopt( + p.namespaces[node.namespace], ['--pidfile', '-p'], + ) + except KeyError: + pidfile_template = _getopt(p.options, ['--pidfile', '-p']) + pid = None + pidfile = node.expander(pidfile_template) + try: + pid = Pidfile(pidfile).read_pid() + except ValueError: + pass + if pid: + nodes.append((node.name, tuple(node.argv), pid)) + else: + self.note('> {0.name}: {1}'.format(node, self.DOWN)) + if callback: + callback(node.name, node.argv, pid) + + return nodes + + def kill(self, argv, cmd): + self.splash() + p = NamespacedOptionParser(argv) + for nodename, _, pid in self.getpids(p, cmd): + self.note('Killing node {0} ({1})'.format(nodename, pid)) + self.signal_node(nodename, pid, signal.SIGKILL) + + def stop(self, argv, cmd, retry=None, callback=None): + self.splash() + p = NamespacedOptionParser(argv) + return self._stop_nodes(p, cmd, retry=retry, callback=callback) + + def _stop_nodes(self, p, cmd, retry=None, callback=None): + restargs = p.args[len(p.values):] + self.shutdown_nodes(self.getpids(p, cmd, callback=callback), + sig=findsig(restargs), + retry=retry, + callback=callback) + + def restart(self, argv, cmd): + self.splash() + p = NamespacedOptionParser(argv) + self.with_detacher_default_options(p) + retvals = [] + + def on_node_shutdown(nodename, argv, pid): + self.note(self.colored.blue( + '> Restarting node {0}: '.format(nodename)), newline=False) + retval = self.waitexec(argv) + self.note(retval and self.FAILED or self.OK) + retvals.append(retval) + + self._stop_nodes(p, cmd, retry=2, callback=on_node_shutdown) + self.retval = int(any(retvals)) + + def stopwait(self, argv, cmd): + self.splash() + p = NamespacedOptionParser(argv) + self.with_detacher_default_options(p) + return self._stop_nodes(p, cmd, retry=2) + stop_verify = stopwait # compat + + def expand(self, argv, cmd=None): + template = argv[0] + p = NamespacedOptionParser(argv[1:]) + for node in multi_args(p, cmd): + self.say(node.expander(template)) + + def help(self, argv, cmd=None): + self.say(__doc__) + + def usage(self): + self.splash() + self.say(USAGE.format(prog_name=self.prog_name)) + + def splash(self): + if not self.nosplash: + c = self.colored + self.note(c.cyan('celery multi v{0}'.format(VERSION_BANNER))) + + def waitexec(self, argv, path=sys.executable): + args = ' '.join([path] + list(argv)) + argstr = shlex.split(from_utf8(args), posix=not IS_WINDOWS) + pipe = Popen(argstr, env=self.env) + self.info(' {0}'.format(' '.join(argstr))) + retcode = pipe.wait() + if retcode < 0: + self.note('* Child was terminated by signal {0}'.format(-retcode)) + return -retcode + elif retcode > 0: + self.note('* Child terminated with errorcode {0}'.format(retcode)) + return retcode + + def error(self, msg=None): + if msg: + self.say(msg) + self.usage() + self.retcode = 1 + return 1 + + def info(self, msg, newline=True): + if self.verbose: + self.note(msg, newline=newline) + + def note(self, msg, newline=True): + if not self.quiet: + self.say(str(msg), newline=newline) + + @cached_property + def colored(self): + return term.colored(enabled=not self.no_color) + + @cached_property + def OK(self): + return str(self.colored.green('OK')) + + @cached_property + def FAILED(self): + return str(self.colored.red('FAILED')) + + @cached_property + def DOWN(self): + return str(self.colored.magenta('DOWN')) + + +def multi_args(p, cmd='celery worker', append='', prefix='', suffix=''): + names = p.values + options = dict(p.options) + passthrough = p.passthrough + ranges = len(names) == 1 + if ranges: + try: + noderange = int(names[0]) + except ValueError: + pass + else: + names = [str(n) for n in range(1, noderange + 1)] + prefix = 'celery' + cmd = options.pop('--cmd', cmd) + append = options.pop('--append', append) + hostname = options.pop('--hostname', + options.pop('-n', socket.gethostname())) + prefix = options.pop('--prefix', prefix) or '' + suffix = options.pop('--suffix', suffix) or hostname + if suffix in ('""', "''"): + suffix = '' + + for ns_name, ns_opts in list(items(p.namespaces)): + if ',' in ns_name or (ranges and '-' in ns_name): + for subns in parse_ns_range(ns_name, ranges): + p.namespaces[subns].update(ns_opts) + p.namespaces.pop(ns_name) + + # Numbers in args always refers to the index in the list of names. + # (e.g. `start foo bar baz -c:1` where 1 is foo, 2 is bar, and so on). + for ns_name, ns_opts in list(items(p.namespaces)): + if ns_name.isdigit(): + ns_index = int(ns_name) - 1 + if ns_index < 0: + raise KeyError('Indexes start at 1 got: %r' % (ns_name, )) + try: + p.namespaces[names[ns_index]].update(ns_opts) + except IndexError: + raise KeyError('No node at index %r' % (ns_name, )) + + for name in names: + this_suffix = suffix + if '@' in name: + this_name = options['-n'] = name + nodename, this_suffix = nodesplit(name) + name = nodename + else: + nodename = '%s%s' % (prefix, name) + this_name = options['-n'] = '%s@%s' % (nodename, this_suffix) + expand = abbreviations({'%h': this_name, + '%n': name, + '%N': nodename, + '%d': this_suffix}) + argv = ([expand(cmd)] + + [format_opt(opt, expand(value)) + for opt, value in items(p.optmerge(name, options))] + + [passthrough]) + if append: + argv.append(expand(append)) + yield multi_args_t(this_name, argv, expand, name) + + +class NamespacedOptionParser(object): + + def __init__(self, args): + self.args = args + self.options = OrderedDict() + self.values = [] + self.passthrough = '' + self.namespaces = defaultdict(lambda: OrderedDict()) + + self.parse() + + def parse(self): + rargs = list(self.args) + pos = 0 + while pos < len(rargs): + arg = rargs[pos] + if arg == '--': + self.passthrough = ' '.join(rargs[pos:]) + break + elif arg[0] == '-': + if arg[1] == '-': + self.process_long_opt(arg[2:]) + else: + value = None + if len(rargs) > pos + 1 and rargs[pos + 1][0] != '-': + value = rargs[pos + 1] + pos += 1 + self.process_short_opt(arg[1:], value) + else: + self.values.append(arg) + pos += 1 + + def process_long_opt(self, arg, value=None): + if '=' in arg: + arg, value = arg.split('=', 1) + self.add_option(arg, value, short=False) + + def process_short_opt(self, arg, value=None): + self.add_option(arg, value, short=True) + + def optmerge(self, ns, defaults=None): + if defaults is None: + defaults = self.options + return OrderedDict(defaults, **self.namespaces[ns]) + + def add_option(self, name, value, short=False, ns=None): + prefix = short and '-' or '--' + dest = self.options + if ':' in name: + name, ns = name.split(':') + dest = self.namespaces[ns] + dest[prefix + name] = value + + +def quote(v): + return "\\'".join("'" + p + "'" for p in v.split("'")) + + +def format_opt(opt, value): + if not value: + return opt + if opt.startswith('--'): + return '{0}={1}'.format(opt, value) + return '{0} {1}'.format(opt, value) + + +def parse_ns_range(ns, ranges=False): + ret = [] + for space in ',' in ns and ns.split(',') or [ns]: + if ranges and '-' in space: + start, stop = space.split('-') + ret.extend( + str(n) for n in range(int(start), int(stop) + 1) + ) + else: + ret.append(space) + return ret + + +def abbreviations(mapping): + + def expand(S): + ret = S + if S is not None: + for short_opt, long_opt in items(mapping): + ret = ret.replace(short_opt, long_opt) + return ret + + return expand + + +def findsig(args, default=signal.SIGTERM): + for arg in reversed(args): + if len(arg) == 2 and arg[0] == '-': + try: + return int(arg[1]) + except ValueError: + pass + if arg[0] == '-': + maybe_sig = 'SIG' + arg[1:] + if maybe_sig in SIGNAMES: + return getattr(signal, maybe_sig) + return default + + +def _getopt(d, alt): + for opt in alt: + try: + return d[opt] + except KeyError: + pass + raise KeyError(alt[0]) + + +def _setdefaultopt(d, alt, value): + for opt in alt[1:]: + try: + return d[opt] + except KeyError: + pass + return d.setdefault(alt[0], value) + + +if __name__ == '__main__': # pragma: no cover + main() diff --git a/celery/bin/worker.py b/celery/bin/worker.py new file mode 100644 index 0000000..dc04075 --- /dev/null +++ b/celery/bin/worker.py @@ -0,0 +1,270 @@ +# -*- coding: utf-8 -*- +""" + +The :program:`celery worker` command (previously known as ``celeryd``) + +.. program:: celery worker + +.. seealso:: + + See :ref:`preload-options`. + +.. cmdoption:: -c, --concurrency + + Number of child processes processing the queue. The default + is the number of CPUs available on your system. + +.. cmdoption:: -P, --pool + + Pool implementation: + + prefork (default), eventlet, gevent, solo or threads. + +.. cmdoption:: -f, --logfile + + Path to log file. If no logfile is specified, `stderr` is used. + +.. cmdoption:: -l, --loglevel + + Logging level, choose between `DEBUG`, `INFO`, `WARNING`, + `ERROR`, `CRITICAL`, or `FATAL`. + +.. cmdoption:: -n, --hostname + + Set custom hostname, e.g. 'w1.%h'. Expands: %h (hostname), + %n (name) and %d, (domain). + +.. cmdoption:: -B, --beat + + Also run the `celery beat` periodic task scheduler. Please note that + there must only be one instance of this service. + +.. cmdoption:: -Q, --queues + + List of queues to enable for this worker, separated by comma. + By default all configured queues are enabled. + Example: `-Q video,image` + +.. cmdoption:: -I, --include + + Comma separated list of additional modules to import. + Example: -I foo.tasks,bar.tasks + +.. cmdoption:: -s, --schedule + + Path to the schedule database if running with the `-B` option. + Defaults to `celerybeat-schedule`. The extension ".db" may be + appended to the filename. + +.. cmdoption:: -O + + Apply optimization profile. Supported: default, fair + +.. cmdoption:: --scheduler + + Scheduler class to use. Default is celery.beat.PersistentScheduler + +.. cmdoption:: -S, --statedb + + Path to the state database. The extension '.db' may + be appended to the filename. Default: {default} + +.. cmdoption:: -E, --events + + Send events that can be captured by monitors like :program:`celery events`, + `celerymon`, and others. + +.. cmdoption:: --without-gossip + + Do not subscribe to other workers events. + +.. cmdoption:: --without-mingle + + Do not synchronize with other workers at startup. + +.. cmdoption:: --without-heartbeat + + Do not send event heartbeats. + +.. cmdoption:: --heartbeat-interval + + Interval in seconds at which to send worker heartbeat + +.. cmdoption:: --purge + + Purges all waiting tasks before the daemon is started. + **WARNING**: This is unrecoverable, and the tasks will be + deleted from the messaging server. + +.. cmdoption:: --time-limit + + Enables a hard time limit (in seconds int/float) for tasks. + +.. cmdoption:: --soft-time-limit + + Enables a soft time limit (in seconds int/float) for tasks. + +.. cmdoption:: --maxtasksperchild + + Maximum number of tasks a pool worker can execute before it's + terminated and replaced by a new worker. + +.. cmdoption:: --pidfile + + Optional file used to store the workers pid. + + The worker will not start if this file already exists + and the pid is still alive. + +.. cmdoption:: --autoscale + + Enable autoscaling by providing + max_concurrency, min_concurrency. Example:: + + --autoscale=10,3 + + (always keep 3 processes, but grow to 10 if necessary) + +.. cmdoption:: --autoreload + + Enable autoreloading. + +.. cmdoption:: --no-execv + + Don't do execv after multiprocessing child fork. + +""" +from __future__ import absolute_import, unicode_literals + +import sys + +from celery import concurrency +from celery.bin.base import Command, Option, daemon_options +from celery.bin.celeryd_detach import detached_celeryd +from celery.five import string_t +from celery.platforms import maybe_drop_privileges +from celery.utils import default_nodename +from celery.utils.log import LOG_LEVELS, mlevel + +__all__ = ['worker', 'main'] + +__MODULE_DOC__ = __doc__ + + +class worker(Command): + """Start worker instance. + + Examples:: + + celery worker --app=proj -l info + celery worker -A proj -l info -Q hipri,lopri + + celery worker -A proj --concurrency=4 + celery worker -A proj --concurrency=1000 -P eventlet + + celery worker --autoscale=10,0 + """ + doc = __MODULE_DOC__ # parse help from this too + namespace = 'celeryd' + enable_config_from_cmdline = True + supports_args = False + + def run_from_argv(self, prog_name, argv=None, command=None): + command = sys.argv[0] if command is None else command + argv = sys.argv[1:] if argv is None else argv + # parse options before detaching so errors can be handled. + options, args = self.prepare_args( + *self.parse_options(prog_name, argv, command)) + self.maybe_detach([command] + argv) + return self(*args, **options) + + def maybe_detach(self, argv, dopts=['-D', '--detach']): + if any(arg in argv for arg in dopts): + argv = [v for v in argv if v not in dopts] + # will never return + detached_celeryd(self.app).execute_from_commandline(argv) + raise SystemExit(0) + + def run(self, hostname=None, pool_cls=None, app=None, uid=None, gid=None, + loglevel=None, logfile=None, pidfile=None, state_db=None, + **kwargs): + maybe_drop_privileges(uid=uid, gid=gid) + # Pools like eventlet/gevent needs to patch libs as early + # as possible. + pool_cls = (concurrency.get_implementation(pool_cls) or + self.app.conf.CELERYD_POOL) + if self.app.IS_WINDOWS and kwargs.get('beat'): + self.die('-B option does not work on Windows. ' + 'Please run celery beat as a separate service.') + hostname = self.host_format(default_nodename(hostname)) + if loglevel: + try: + loglevel = mlevel(loglevel) + except KeyError: # pragma: no cover + self.die('Unknown level {0!r}. Please use one of {1}.'.format( + loglevel, '|'.join( + l for l in LOG_LEVELS if isinstance(l, string_t)))) + + return self.app.Worker( + hostname=hostname, pool_cls=pool_cls, loglevel=loglevel, + logfile=logfile, # node format handled by celery.app.log.setup + pidfile=self.node_format(pidfile, hostname), + state_db=self.node_format(state_db, hostname), **kwargs + ).start() + + def with_pool_option(self, argv): + # this command support custom pools + # that may have to be loaded as early as possible. + return (['-P'], ['--pool']) + + def get_options(self): + conf = self.app.conf + return ( + Option('-c', '--concurrency', + default=conf.CELERYD_CONCURRENCY, type='int'), + Option('-P', '--pool', default=conf.CELERYD_POOL, dest='pool_cls'), + Option('--purge', '--discard', default=False, action='store_true'), + Option('-l', '--loglevel', default=conf.CELERYD_LOG_LEVEL), + Option('-n', '--hostname'), + Option('-B', '--beat', action='store_true'), + Option('-s', '--schedule', dest='schedule_filename', + default=conf.CELERYBEAT_SCHEDULE_FILENAME), + Option('--scheduler', dest='scheduler_cls'), + Option('-S', '--statedb', + default=conf.CELERYD_STATE_DB, dest='state_db'), + Option('-E', '--events', default=conf.CELERY_SEND_EVENTS, + action='store_true', dest='send_events'), + Option('--time-limit', type='float', dest='task_time_limit', + default=conf.CELERYD_TASK_TIME_LIMIT), + Option('--soft-time-limit', dest='task_soft_time_limit', + default=conf.CELERYD_TASK_SOFT_TIME_LIMIT, type='float'), + Option('--maxtasksperchild', dest='max_tasks_per_child', + default=conf.CELERYD_MAX_TASKS_PER_CHILD, type='int'), + Option('--queues', '-Q', default=[]), + Option('--exclude-queues', '-X', default=[]), + Option('--include', '-I', default=[]), + Option('--autoscale'), + Option('--autoreload', action='store_true'), + Option('--no-execv', action='store_true', default=False), + Option('--without-gossip', action='store_true', default=False), + Option('--without-mingle', action='store_true', default=False), + Option('--without-heartbeat', action='store_true', default=False), + Option('--heartbeat-interval', type='int'), + Option('-O', dest='optimization'), + Option('-D', '--detach', action='store_true'), + ) + daemon_options() + tuple(self.app.user_options['worker']) + + +def main(app=None): + # Fix for setuptools generated scripts, so that it will + # work with multiprocessing fork emulation. + # (see multiprocessing.forking.get_preparation_data()) + if __name__ != '__main__': # pragma: no cover + sys.modules['__main__'] = sys.modules[__name__] + from billiard import freeze_support + freeze_support() + worker(app=app).execute_from_commandline() + + +if __name__ == '__main__': # pragma: no cover + main() diff --git a/celery/bootsteps.py b/celery/bootsteps.py new file mode 100644 index 0000000..4471a4c --- /dev/null +++ b/celery/bootsteps.py @@ -0,0 +1,422 @@ +# -*- coding: utf-8 -*- +""" + celery.bootsteps + ~~~~~~~~~~~~~~~~ + + A directed acyclic graph of reusable components. + +""" +from __future__ import absolute_import, unicode_literals + +from collections import deque +from threading import Event + +from kombu.common import ignore_errors +from kombu.utils import symbol_by_name + +from .datastructures import DependencyGraph, GraphFormatter +from .five import values, with_metaclass +from .utils.imports import instantiate, qualname +from .utils.log import get_logger + +try: + from greenlet import GreenletExit + IGNORE_ERRORS = (GreenletExit, ) +except ImportError: # pragma: no cover + IGNORE_ERRORS = () + +__all__ = ['Blueprint', 'Step', 'StartStopStep', 'ConsumerStep'] + +#: States +RUN = 0x1 +CLOSE = 0x2 +TERMINATE = 0x3 + +logger = get_logger(__name__) +debug = logger.debug + + +def _pre(ns, fmt): + return '| {0}: {1}'.format(ns.alias, fmt) + + +def _label(s): + return s.name.rsplit('.', 1)[-1] + + +class StepFormatter(GraphFormatter): + """Graph formatter for :class:`Blueprint`.""" + + blueprint_prefix = '⧉' + conditional_prefix = '∘' + blueprint_scheme = { + 'shape': 'parallelogram', + 'color': 'slategray4', + 'fillcolor': 'slategray3', + } + + def label(self, step): + return step and '{0}{1}'.format( + self._get_prefix(step), + (step.label or _label(step)).encode('utf-8', 'ignore'), + ) + + def _get_prefix(self, step): + if step.last: + return self.blueprint_prefix + if step.conditional: + return self.conditional_prefix + return '' + + def node(self, obj, **attrs): + scheme = self.blueprint_scheme if obj.last else self.node_scheme + return self.draw_node(obj, scheme, attrs) + + def edge(self, a, b, **attrs): + if a.last: + attrs.update(arrowhead='none', color='darkseagreen3') + return self.draw_edge(a, b, self.edge_scheme, attrs) + + +class Blueprint(object): + """Blueprint containing bootsteps that can be applied to objects. + + :keyword steps: List of steps. + :keyword name: Set explicit name for this blueprint. + :keyword app: Set the Celery app for this blueprint. + :keyword on_start: Optional callback applied after blueprint start. + :keyword on_close: Optional callback applied before blueprint close. + :keyword on_stopped: Optional callback applied after blueprint stopped. + + """ + GraphFormatter = StepFormatter + + name = None + state = None + started = 0 + default_steps = set() + state_to_name = { + 0: 'initializing', + RUN: 'running', + CLOSE: 'closing', + TERMINATE: 'terminating', + } + + def __init__(self, steps=None, name=None, app=None, + on_start=None, on_close=None, on_stopped=None): + self.app = app + self.name = name or self.name or qualname(type(self)) + self.types = set(steps or []) | set(self.default_steps) + self.on_start = on_start + self.on_close = on_close + self.on_stopped = on_stopped + self.shutdown_complete = Event() + self.steps = {} + + def start(self, parent): + self.state = RUN + if self.on_start: + self.on_start() + for i, step in enumerate(s for s in parent.steps if s is not None): + self._debug('Starting %s', step.alias) + self.started = i + 1 + step.start(parent) + debug('^-- substep ok') + + def human_state(self): + return self.state_to_name[self.state or 0] + + def info(self, parent): + info = {} + for step in parent.steps: + info.update(step.info(parent) or {}) + return info + + def close(self, parent): + if self.on_close: + self.on_close() + self.send_all(parent, 'close', 'closing', reverse=False) + + def restart(self, parent, method='stop', + description='restarting', propagate=False): + self.send_all(parent, method, description, propagate=propagate) + + def send_all(self, parent, method, + description=None, reverse=True, propagate=True, args=()): + description = description or method.replace('_', ' ') + steps = reversed(parent.steps) if reverse else parent.steps + for step in steps: + if step: + fun = getattr(step, method, None) + if fun is not None: + self._debug('%s %s...', + description.capitalize(), step.alias) + try: + fun(parent, *args) + except Exception as exc: + if propagate: + raise + logger.error( + 'Error on %s %s: %r', + description, step.alias, exc, exc_info=1, + ) + + def stop(self, parent, close=True, terminate=False): + what = 'terminating' if terminate else 'stopping' + if self.state in (CLOSE, TERMINATE): + return + + if self.state != RUN or self.started != len(parent.steps): + # Not fully started, can safely exit. + self.state = TERMINATE + self.shutdown_complete.set() + return + self.close(parent) + self.state = CLOSE + + self.restart( + parent, 'terminate' if terminate else 'stop', + description=what, propagate=False, + ) + + if self.on_stopped: + self.on_stopped() + self.state = TERMINATE + self.shutdown_complete.set() + + def join(self, timeout=None): + try: + # Will only get here if running green, + # makes sure all greenthreads have exited. + self.shutdown_complete.wait(timeout=timeout) + except IGNORE_ERRORS: + pass + + def apply(self, parent, **kwargs): + """Apply the steps in this blueprint to an object. + + This will apply the ``__init__`` and ``include`` methods + of each step, with the object as argument:: + + step = Step(obj) + ... + step.include(obj) + + For :class:`StartStopStep` the services created + will also be added to the objects ``steps`` attribute. + + """ + self._debug('Preparing bootsteps.') + order = self.order = [] + steps = self.steps = self.claim_steps() + + self._debug('Building graph...') + for S in self._finalize_steps(steps): + step = S(parent, **kwargs) + steps[step.name] = step + order.append(step) + self._debug('New boot order: {%s}', + ', '.join(s.alias for s in self.order)) + for step in order: + step.include(parent) + return self + + def connect_with(self, other): + self.graph.adjacent.update(other.graph.adjacent) + self.graph.add_edge(type(other.order[0]), type(self.order[-1])) + + def __getitem__(self, name): + return self.steps[name] + + def _find_last(self): + return next((C for C in values(self.steps) if C.last), None) + + def _firstpass(self, steps): + for step in values(steps): + step.requires = [symbol_by_name(dep) for dep in step.requires] + stream = deque(step.requires for step in values(steps)) + while stream: + for node in stream.popleft(): + node = symbol_by_name(node) + if node.name not in self.steps: + steps[node.name] = node + stream.append(node.requires) + + def _finalize_steps(self, steps): + last = self._find_last() + self._firstpass(steps) + it = ((C, C.requires) for C in values(steps)) + G = self.graph = DependencyGraph( + it, formatter=self.GraphFormatter(root=last), + ) + if last: + for obj in G: + if obj != last: + G.add_edge(last, obj) + try: + return G.topsort() + except KeyError as exc: + raise KeyError('unknown bootstep: %s' % exc) + + def claim_steps(self): + return dict(self.load_step(step) for step in self._all_steps()) + + def _all_steps(self): + return self.types | self.app.steps[self.name.lower()] + + def load_step(self, step): + step = symbol_by_name(step) + return step.name, step + + def _debug(self, msg, *args): + return debug(_pre(self, msg), *args) + + @property + def alias(self): + return _label(self) + + +class StepType(type): + """Metaclass for steps.""" + + def __new__(cls, name, bases, attrs): + module = attrs.get('__module__') + qname = '{0}.{1}'.format(module, name) if module else name + attrs.update( + __qualname__=qname, + name=attrs.get('name') or qname, + ) + return super(StepType, cls).__new__(cls, name, bases, attrs) + + def __str__(self): + return self.name + + def __repr__(self): + return 'step:{0.name}{{{0.requires!r}}}'.format(self) + + +@with_metaclass(StepType) +class Step(object): + """A Bootstep. + + The :meth:`__init__` method is called when the step + is bound to a parent object, and can as such be used + to initialize attributes in the parent object at + parent instantiation-time. + + """ + + #: Optional step name, will use qualname if not specified. + name = None + + #: Optional short name used for graph outputs and in logs. + label = None + + #: Set this to true if the step is enabled based on some condition. + conditional = False + + #: List of other steps that that must be started before this step. + #: Note that all dependencies must be in the same blueprint. + requires = () + + #: This flag is reserved for the workers Consumer, + #: since it is required to always be started last. + #: There can only be one object marked last + #: in every blueprint. + last = False + + #: This provides the default for :meth:`include_if`. + enabled = True + + def __init__(self, parent, **kwargs): + pass + + def include_if(self, parent): + """An optional predicate that decides whether this + step should be created.""" + return self.enabled + + def instantiate(self, name, *args, **kwargs): + return instantiate(name, *args, **kwargs) + + def _should_include(self, parent): + if self.include_if(parent): + return True, self.create(parent) + return False, None + + def include(self, parent): + return self._should_include(parent)[0] + + def create(self, parent): + """Create the step.""" + pass + + def __repr__(self): + return ''.format(self) + + @property + def alias(self): + return self.label or _label(self) + + def info(self, obj): + pass + + +class StartStopStep(Step): + + #: Optional obj created by the :meth:`create` method. + #: This is used by :class:`StartStopStep` to keep the + #: original service object. + obj = None + + def start(self, parent): + if self.obj: + return self.obj.start() + + def stop(self, parent): + if self.obj: + return self.obj.stop() + + def close(self, parent): + pass + + def terminate(self, parent): + if self.obj: + return getattr(self.obj, 'terminate', self.obj.stop)() + + def include(self, parent): + inc, ret = self._should_include(parent) + if inc: + self.obj = ret + parent.steps.append(self) + return inc + + +class ConsumerStep(StartStopStep): + requires = ('celery.worker.consumer:Connection', ) + consumers = None + + def get_consumers(self, channel): + raise NotImplementedError('missing get_consumers') + + def start(self, c): + channel = c.connection.channel() + self.consumers = self.get_consumers(channel) + for consumer in self.consumers or []: + consumer.consume() + + def stop(self, c): + self._close(c, True) + + def shutdown(self, c): + self._close(c, False) + + def _close(self, c, cancel_consumers=True): + channels = set() + for consumer in self.consumers or []: + if cancel_consumers: + ignore_errors(c.connection, consumer.cancel) + if consumer.channel: + channels.add(consumer.channel) + for channel in channels: + ignore_errors(c.connection, channel.close) diff --git a/celery/canvas.py b/celery/canvas.py new file mode 100644 index 0000000..7b330dc --- /dev/null +++ b/celery/canvas.py @@ -0,0 +1,664 @@ +# -*- coding: utf-8 -*- +""" + celery.canvas + ~~~~~~~~~~~~~ + + Composing task workflows. + + Documentation for some of these types are in :mod:`celery`. + You should import these from :mod:`celery` and not this module. + + +""" +from __future__ import absolute_import + +from collections import MutableSequence +from copy import deepcopy +from functools import partial as _partial, reduce +from operator import itemgetter +from itertools import chain as _chain + +from kombu.utils import cached_property, fxrange, kwdict, reprcall, uuid + +from celery._state import current_app +from celery.utils.functional import ( + maybe_list, is_list, regen, + chunks as _chunks, +) +from celery.utils.text import truncate + +__all__ = ['Signature', 'chain', 'xmap', 'xstarmap', 'chunks', + 'group', 'chord', 'signature', 'maybe_signature'] + + +class _getitem_property(object): + """Attribute -> dict key descriptor. + + The target object must support ``__getitem__``, + and optionally ``__setitem__``. + + Example: + + >>> from collections import defaultdict + + >>> class Me(dict): + ... deep = defaultdict(dict) + ... + ... foo = _getitem_property('foo') + ... deep_thing = _getitem_property('deep.thing') + + + >>> me = Me() + >>> me.foo + None + + >>> me.foo = 10 + >>> me.foo + 10 + >>> me['foo'] + 10 + + >>> me.deep_thing = 42 + >>> me.deep_thing + 42 + >>> me.deep + defaultdict(, {'thing': 42}) + + """ + + def __init__(self, keypath): + path, _, self.key = keypath.rpartition('.') + self.path = path.split('.') if path else None + + def _path(self, obj): + return (reduce(lambda d, k: d[k], [obj] + self.path) if self.path + else obj) + + def __get__(self, obj, type=None): + if obj is None: + return type + return self._path(obj).get(self.key) + + def __set__(self, obj, value): + self._path(obj)[self.key] = value + + +def maybe_unroll_group(g): + """Unroll group with only one member.""" + # Issue #1656 + try: + size = len(g.tasks) + except TypeError: + try: + size = g.tasks.__length_hint__() + except (AttributeError, TypeError): + pass + else: + return list(g.tasks)[0] if size == 1 else g + else: + return g.tasks[0] if size == 1 else g + + +class Signature(dict): + """Class that wraps the arguments and execution options + for a single task invocation. + + Used as the parts in a :class:`group` and other constructs, + or to pass tasks around as callbacks while being compatible + with serializers with a strict type subset. + + :param task: Either a task class/instance, or the name of a task. + :keyword args: Positional arguments to apply. + :keyword kwargs: Keyword arguments to apply. + :keyword options: Additional options to :meth:`Task.apply_async`. + + Note that if the first argument is a :class:`dict`, the other + arguments will be ignored and the values in the dict will be used + instead. + + >>> s = signature('tasks.add', args=(2, 2)) + >>> signature(s) + {'task': 'tasks.add', args=(2, 2), kwargs={}, options={}} + + """ + TYPES = {} + _app = _type = None + + @classmethod + def register_type(cls, subclass, name=None): + cls.TYPES[name or subclass.__name__] = subclass + return subclass + + @classmethod + def from_dict(self, d, app=None): + typ = d.get('subtask_type') + if typ: + return self.TYPES[typ].from_dict(kwdict(d), app=app) + return Signature(d, app=app) + + def __init__(self, task=None, args=None, kwargs=None, options=None, + type=None, subtask_type=None, immutable=False, + app=None, **ex): + self._app = app + init = dict.__init__ + + if isinstance(task, dict): + return init(self, task) # works like dict(d) + + # Also supports using task class/instance instead of string name. + try: + task_name = task.name + except AttributeError: + task_name = task + else: + self._type = task + + init(self, + task=task_name, args=tuple(args or ()), + kwargs=kwargs or {}, + options=dict(options or {}, **ex), + subtask_type=subtask_type, + immutable=immutable) + + def __call__(self, *partial_args, **partial_kwargs): + args, kwargs, _ = self._merge(partial_args, partial_kwargs, None) + return self.type(*args, **kwargs) + + def delay(self, *partial_args, **partial_kwargs): + return self.apply_async(partial_args, partial_kwargs) + + def apply(self, args=(), kwargs={}, **options): + """Apply this task locally.""" + # For callbacks: extra args are prepended to the stored args. + args, kwargs, options = self._merge(args, kwargs, options) + return self.type.apply(args, kwargs, **options) + + def _merge(self, args=(), kwargs={}, options={}): + if self.immutable: + return (self.args, self.kwargs, + dict(self.options, **options) if options else self.options) + return (tuple(args) + tuple(self.args) if args else self.args, + dict(self.kwargs, **kwargs) if kwargs else self.kwargs, + dict(self.options, **options) if options else self.options) + + def clone(self, args=(), kwargs={}, **opts): + # need to deepcopy options so origins links etc. is not modified. + if args or kwargs or opts: + args, kwargs, opts = self._merge(args, kwargs, opts) + else: + args, kwargs, opts = self.args, self.kwargs, self.options + s = Signature.from_dict({'task': self.task, 'args': tuple(args), + 'kwargs': kwargs, 'options': deepcopy(opts), + 'subtask_type': self.subtask_type, + 'immutable': self.immutable}, app=self._app) + s._type = self._type + return s + partial = clone + + def freeze(self, _id=None, group_id=None, chord=None): + opts = self.options + try: + tid = opts['task_id'] + except KeyError: + tid = opts['task_id'] = _id or uuid() + if 'reply_to' not in opts: + opts['reply_to'] = self.app.oid + if group_id: + opts['group_id'] = group_id + if chord: + opts['chord'] = chord + return self.AsyncResult(tid) + _freeze = freeze + + def replace(self, args=None, kwargs=None, options=None): + s = self.clone() + if args is not None: + s.args = args + if kwargs is not None: + s.kwargs = kwargs + if options is not None: + s.options = options + return s + + def set(self, immutable=None, **options): + if immutable is not None: + self.set_immutable(immutable) + self.options.update(options) + return self + + def set_immutable(self, immutable): + self.immutable = immutable + + def apply_async(self, args=(), kwargs={}, **options): + try: + _apply = self._apply_async + except IndexError: # no tasks for chain, etc to find type + return + # For callbacks: extra args are prepended to the stored args. + if args or kwargs or options: + args, kwargs, options = self._merge(args, kwargs, options) + else: + args, kwargs, options = self.args, self.kwargs, self.options + return _apply(args, kwargs, **options) + + def append_to_list_option(self, key, value): + items = self.options.setdefault(key, []) + if not isinstance(items, MutableSequence): + items = self.options[key] = [items] + if value not in items: + items.append(value) + return value + + def link(self, callback): + return self.append_to_list_option('link', callback) + + def link_error(self, errback): + return self.append_to_list_option('link_error', errback) + + def flatten_links(self): + return list(_chain.from_iterable(_chain( + [[self]], + (link.flatten_links() + for link in maybe_list(self.options.get('link')) or []) + ))) + + def __or__(self, other): + if isinstance(other, group): + other = maybe_unroll_group(other) + if not isinstance(self, chain) and isinstance(other, chain): + return chain((self, ) + other.tasks, app=self._app) + elif isinstance(other, chain): + return chain(*self.tasks + other.tasks, app=self._app) + elif isinstance(other, Signature): + if isinstance(self, chain): + return chain(*self.tasks + (other, ), app=self._app) + return chain(self, other, app=self._app) + return NotImplemented + + def __deepcopy__(self, memo): + memo[id(self)] = self + return dict(self) + + def __invert__(self): + return self.apply_async().get() + + def __reduce__(self): + # for serialization, the task type is lazily loaded, + # and not stored in the dict itself. + return subtask, (dict(self), ) + + def reprcall(self, *args, **kwargs): + args, kwargs, _ = self._merge(args, kwargs, {}) + return reprcall(self['task'], args, kwargs) + + def election(self): + type = self.type + app = type.app + tid = self.options.get('task_id') or uuid() + + with app.producer_or_acquire(None) as P: + props = type.backend.on_task_call(P, tid) + app.control.election(tid, 'task', self.clone(task_id=tid, **props), + connection=P.connection) + return type.AsyncResult(tid) + + def __repr__(self): + return self.reprcall() + + @cached_property + def type(self): + return self._type or self.app.tasks[self['task']] + + @cached_property + def app(self): + return self._app or current_app + + @cached_property + def AsyncResult(self): + try: + return self.type.AsyncResult + except KeyError: # task not registered + return self.app.AsyncResult + + @cached_property + def _apply_async(self): + try: + return self.type.apply_async + except KeyError: + return _partial(self.app.send_task, self['task']) + id = _getitem_property('options.task_id') + task = _getitem_property('task') + args = _getitem_property('args') + kwargs = _getitem_property('kwargs') + options = _getitem_property('options') + subtask_type = _getitem_property('subtask_type') + immutable = _getitem_property('immutable') + + +@Signature.register_type +class chain(Signature): + + def __init__(self, *tasks, **options): + tasks = (regen(tasks[0]) if len(tasks) == 1 and is_list(tasks[0]) + else tasks) + Signature.__init__( + self, 'celery.chain', (), {'tasks': tasks}, **options + ) + self.tasks = tasks + self.subtask_type = 'chain' + + def __call__(self, *args, **kwargs): + if self.tasks: + return self.apply_async(args, kwargs) + + @classmethod + def from_dict(self, d, app=None): + tasks = d['kwargs']['tasks'] + if d['args'] and tasks: + # partial args passed on to first task in chain (Issue #1057). + tasks[0]['args'] = tasks[0]._merge(d['args'])[0] + return chain(*d['kwargs']['tasks'], app=app, **kwdict(d['options'])) + + @property + def type(self): + try: + return self._type or self.tasks[0].type.app.tasks['celery.chain'] + except KeyError: + return self.app.tasks['celery.chain'] + + def __repr__(self): + return ' | '.join(repr(t) for t in self.tasks) + + +class _basemap(Signature): + _task_name = None + _unpack_args = itemgetter('task', 'it') + + def __init__(self, task, it, **options): + Signature.__init__( + self, self._task_name, (), + {'task': task, 'it': regen(it)}, immutable=True, **options + ) + + def apply_async(self, args=(), kwargs={}, **opts): + # need to evaluate generators + task, it = self._unpack_args(self.kwargs) + return self.type.apply_async( + (), {'task': task, 'it': list(it)}, **opts + ) + + @classmethod + def from_dict(cls, d, app=None): + return cls(*cls._unpack_args(d['kwargs']), app=app, **d['options']) + + +@Signature.register_type +class xmap(_basemap): + _task_name = 'celery.map' + + def __repr__(self): + task, it = self._unpack_args(self.kwargs) + return '[{0}(x) for x in {1}]'.format(task.task, + truncate(repr(it), 100)) + + +@Signature.register_type +class xstarmap(_basemap): + _task_name = 'celery.starmap' + + def __repr__(self): + task, it = self._unpack_args(self.kwargs) + return '[{0}(*x) for x in {1}]'.format(task.task, + truncate(repr(it), 100)) + + +@Signature.register_type +class chunks(Signature): + _unpack_args = itemgetter('task', 'it', 'n') + + def __init__(self, task, it, n, **options): + Signature.__init__( + self, 'celery.chunks', (), + {'task': task, 'it': regen(it), 'n': n}, + immutable=True, **options + ) + + @classmethod + def from_dict(self, d, app=None): + return chunks(*self._unpack_args(d['kwargs']), app=app, **d['options']) + + def apply_async(self, args=(), kwargs={}, **opts): + return self.group().apply_async(args, kwargs, **opts) + + def __call__(self, **options): + return self.group()(**options) + + def group(self): + # need to evaluate generators + task, it, n = self._unpack_args(self.kwargs) + return group((xstarmap(task, part, app=self._app) + for part in _chunks(iter(it), n)), + app=self._app) + + @classmethod + def apply_chunks(cls, task, it, n, app=None): + return cls(task, it, n, app=app)() + + +def _maybe_group(tasks): + if isinstance(tasks, group): + tasks = list(tasks.tasks) + elif isinstance(tasks, Signature): + tasks = [tasks] + else: + tasks = regen(tasks) + return tasks + + +def _maybe_clone(tasks, app): + return [s.clone() if isinstance(s, Signature) else signature(s, app=app) + for s in tasks] + + +@Signature.register_type +class group(Signature): + + def __init__(self, *tasks, **options): + if len(tasks) == 1: + tasks = _maybe_group(tasks[0]) + Signature.__init__( + self, 'celery.group', (), {'tasks': tasks}, **options + ) + self.tasks, self.subtask_type = tasks, 'group' + + @classmethod + def from_dict(self, d, app=None): + tasks = d['kwargs']['tasks'] + if d['args'] and tasks: + # partial args passed on to all tasks in the group (Issue #1057). + for task in tasks: + task['args'] = task._merge(d['args'])[0] + return group(tasks, app=app, **kwdict(d['options'])) + + def apply_async(self, args=(), kwargs=None, add_to_parent=True, **options): + tasks = _maybe_clone(self.tasks, app=self._app) + if not tasks: + return self.freeze() + type = self.type + return type(*type.prepare(dict(self.options, **options), tasks, args), + add_to_parent=add_to_parent) + + def set_immutable(self, immutable): + for task in self.tasks: + task.set_immutable(immutable) + + def link(self, sig): + # Simply link to first task + sig = sig.clone().set(immutable=True) + return self.tasks[0].link(sig) + + def link_error(self, sig): + sig = sig.clone().set(immutable=True) + return self.tasks[0].link_error(sig) + + def apply(self, *args, **kwargs): + if not self.tasks: + return self.freeze() # empty group returns GroupResult + return Signature.apply(self, *args, **kwargs) + + def __call__(self, *partial_args, **options): + return self.apply_async(partial_args, **options) + + def freeze(self, _id=None, group_id=None, chord=None): + opts = self.options + try: + gid = opts['task_id'] + except KeyError: + gid = opts['task_id'] = uuid() + if group_id: + opts['group_id'] = group_id + if chord: + opts['chord'] = group_id + new_tasks, results = [], [] + for task in self.tasks: + task = maybe_signature(task, app=self._app).clone() + results.append(task.freeze(group_id=group_id, chord=chord)) + new_tasks.append(task) + self.tasks = self.kwargs['tasks'] = new_tasks + return self.app.GroupResult(gid, results) + _freeze = freeze + + def skew(self, start=1.0, stop=None, step=1.0): + it = fxrange(start, stop, step, repeatlast=True) + for task in self.tasks: + task.set(countdown=next(it)) + return self + + def __iter__(self): + return iter(self.tasks) + + def __repr__(self): + return repr(self.tasks) + + @property + def type(self): + if self._type: + return self._type + # taking the app from the first task in the list, there may be a + # better solution for this, e.g. to consolidate tasks with the same + # app and apply them in batches. + app = self._app if self._app else self.tasks[0].type.app + return app.tasks[self['task']] + + +@Signature.register_type +class chord(Signature): + + def __init__(self, header, body=None, task='celery.chord', + args=(), kwargs={}, **options): + Signature.__init__( + self, task, args, + dict(kwargs, header=_maybe_group(header), + body=maybe_signature(body, app=self._app)), **options + ) + self.subtask_type = 'chord' + + def freeze(self, _id=None, group_id=None, chord=None): + return self.body.freeze(_id, group_id=group_id, chord=chord) + + @classmethod + def from_dict(self, d, app=None): + args, d['kwargs'] = self._unpack_args(**kwdict(d['kwargs'])) + return self(*args, app=app, **kwdict(d)) + + @staticmethod + def _unpack_args(header=None, body=None, **kwargs): + # Python signatures are better at extracting keys from dicts + # than manually popping things off. + return (header, body), kwargs + + @property + def type(self): + if self._type: + return self._type + # we will be able to fix this mess in 3.2 when we no longer + # require an actual task implementation for chord/group + if self._app: + app = self._app + else: + try: + app = self.tasks[0].type.app + except IndexError: + app = self.body.type.app + return app.tasks['celery.chord'] + + def apply_async(self, args=(), kwargs={}, task_id=None, + producer=None, publisher=None, connection=None, + router=None, result_cls=None, **options): + body = kwargs.get('body') or self.kwargs['body'] + kwargs = dict(self.kwargs, **kwargs) + body = body.clone(**options) + + _chord = self.type + if _chord.app.conf.CELERY_ALWAYS_EAGER: + return self.apply((), kwargs, task_id=task_id, **options) + res = body.freeze(task_id) + parent = _chord(self.tasks, body, args, **options) + res.parent = parent + return res + + def __call__(self, body=None, **options): + return self.apply_async((), {'body': body} if body else {}, **options) + + def clone(self, *args, **kwargs): + s = Signature.clone(self, *args, **kwargs) + # need to make copy of body + try: + s.kwargs['body'] = s.kwargs['body'].clone() + except (AttributeError, KeyError): + pass + return s + + def link(self, callback): + self.body.link(callback) + return callback + + def link_error(self, errback): + self.body.link_error(errback) + return errback + + def set_immutable(self, immutable): + # changes mutability of header only, not callback. + for task in self.tasks: + task.set_immutable(immutable) + + def __repr__(self): + if self.body: + return self.body.reprcall(self.tasks) + return ''.format(self) + + tasks = _getitem_property('kwargs.header') + body = _getitem_property('kwargs.body') + + +def signature(varies, *args, **kwargs): + if isinstance(varies, dict): + if isinstance(varies, Signature): + return varies.clone() + return Signature.from_dict(varies) + return Signature(varies, *args, **kwargs) +subtask = signature # XXX compat + + +def maybe_signature(d, app=None): + if d is not None: + if isinstance(d, dict): + if not isinstance(d, Signature): + return signature(d, app=app) + elif isinstance(d, list): + return [maybe_signature(s, app=app) for s in d] + if app is not None: + d._app = app + return d + +maybe_subtask = maybe_signature # XXX compat diff --git a/celery/concurrency/__init__.py b/celery/concurrency/__init__.py new file mode 100644 index 0000000..c58fdbc --- /dev/null +++ b/celery/concurrency/__init__.py @@ -0,0 +1,29 @@ +# -*- coding: utf-8 -*- +""" + celery.concurrency + ~~~~~~~~~~~~~~~~~~ + + Pool implementation abstract factory, and alias definitions. + +""" +from __future__ import absolute_import + +# Import from kombu directly as it's used +# early in the import stage, where celery.utils loads +# too much (e.g. for eventlet patching) +from kombu.utils import symbol_by_name + +__all__ = ['get_implementation'] + +ALIASES = { + 'prefork': 'celery.concurrency.prefork:TaskPool', + 'eventlet': 'celery.concurrency.eventlet:TaskPool', + 'gevent': 'celery.concurrency.gevent:TaskPool', + 'threads': 'celery.concurrency.threads:TaskPool', + 'solo': 'celery.concurrency.solo:TaskPool', + 'processes': 'celery.concurrency.prefork:TaskPool', # XXX compat alias +} + + +def get_implementation(cls): + return symbol_by_name(cls, ALIASES) diff --git a/celery/concurrency/asynpool.py b/celery/concurrency/asynpool.py new file mode 100644 index 0000000..0cb3fcf --- /dev/null +++ b/celery/concurrency/asynpool.py @@ -0,0 +1,1225 @@ +# -*- coding: utf-8 -*- +""" + celery.concurrency.asynpool + ~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + .. note:: + + This module will be moved soon, so don't use it directly. + + Non-blocking version of :class:`multiprocessing.Pool`. + + This code deals with three major challenges: + + 1) Starting up child processes and keeping them running. + 2) Sending jobs to the processes and receiving results back. + 3) Safely shutting down this system. + +""" +from __future__ import absolute_import + +import errno +import os +import select +import socket +import struct +import sys +import time + +from collections import deque, namedtuple +from io import BytesIO +from pickle import HIGHEST_PROTOCOL +from time import sleep +from weakref import WeakValueDictionary, ref + +from amqp.utils import promise +from billiard.pool import RUN, TERMINATE, ACK, NACK, WorkersJoined +from billiard import pool as _pool +from billiard.compat import buf_t, setblocking, isblocking +from billiard.einfo import ExceptionInfo +from billiard.queues import _SimpleQueue +from kombu.async import READ, WRITE, ERR +from kombu.serialization import pickle as _pickle +from kombu.utils import fxrange +from kombu.utils.compat import get_errno +from kombu.utils.eventio import SELECT_BAD_FD +from celery.five import Counter, items, values +from celery.utils.log import get_logger +from celery.utils.text import truncate +from celery.worker import state as worker_state + +try: + from _billiard import read as __read__ + from struct import unpack_from as _unpack_from + memoryview = memoryview + readcanbuf = True + + if sys.version_info[0] == 2 and sys.version_info < (2, 7, 6): + + def unpack_from(fmt, view, _unpack_from=_unpack_from): # noqa + return _unpack_from(fmt, view.tobytes()) # <- memoryview + else: + # unpack_from supports memoryview in 2.7.6 and 3.3+ + unpack_from = _unpack_from # noqa + +except (ImportError, NameError): # pragma: no cover + + def __read__(fd, buf, size, read=os.read): # noqa + chunk = read(fd, size) + n = len(chunk) + if n != 0: + buf.write(chunk) + return n + readcanbuf = False # noqa + + def unpack_from(fmt, iobuf, unpack=struct.unpack): # noqa + return unpack(fmt, iobuf.getvalue()) # <-- BytesIO + + +logger = get_logger(__name__) +error, debug = logger.error, logger.debug + +UNAVAIL = frozenset([errno.EAGAIN, errno.EINTR]) + +#: Constant sent by child process when started (ready to accept work) +WORKER_UP = 15 + +#: A process must have started before this timeout (in secs.) expires. +PROC_ALIVE_TIMEOUT = 4.0 + +SCHED_STRATEGY_PREFETCH = 1 +SCHED_STRATEGY_FAIR = 4 + +SCHED_STRATEGIES = { + None: SCHED_STRATEGY_PREFETCH, + 'fair': SCHED_STRATEGY_FAIR, +} + +RESULT_MAXLEN = 128 + +Ack = namedtuple('Ack', ('id', 'fd', 'payload')) + + +def gen_not_started(gen): + # gi_frame is None when generator stopped. + return gen.gi_frame and gen.gi_frame.f_lasti == -1 + + +def _get_job_writer(job): + try: + writer = job._writer + except AttributeError: + pass + else: + return writer() # is a weakref + + +def _select(readers=None, writers=None, err=None, timeout=0): + """Simple wrapper to :class:`~select.select`. + + :param readers: Set of reader fds to test if readable. + :param writers: Set of writer fds to test if writable. + :param err: Set of fds to test for error condition. + + All fd sets passed must be mutable as this function + will remove non-working fds from them, this also means + the caller must make sure there are still fds in the sets + before calling us again. + + :returns: tuple of ``(readable, writable, again)``, where + ``readable`` is a set of fds that have data available for read, + ``writable`` is a set of fds that is ready to be written to + and ``again`` is a flag that if set means the caller must + throw away the result and call us again. + + """ + readers = set() if readers is None else readers + writers = set() if writers is None else writers + err = set() if err is None else err + try: + r, w, e = select.select(readers, writers, err, timeout) + if e: + r = list(set(r) | set(e)) + return r, w, 0 + except (select.error, socket.error) as exc: + if get_errno(exc) == errno.EINTR: + return [], [], 1 + elif get_errno(exc) in SELECT_BAD_FD: + for fd in readers | writers | err: + try: + select.select([fd], [], [], 0) + except (select.error, socket.error) as exc: + if get_errno(exc) not in SELECT_BAD_FD: + raise + readers.discard(fd) + writers.discard(fd) + err.discard(fd) + return [], [], 1 + else: + raise + + +class Worker(_pool.Worker): + """Pool worker process.""" + dead = False + + def on_loop_start(self, pid): + # our version sends a WORKER_UP message when the process is ready + # to accept work, this will tell the parent that the inqueue fd + # is writable. + self.outq.put((WORKER_UP, (pid, ))) + + def prepare_result(self, result, RESULT_MAXLEN=RESULT_MAXLEN): + if not isinstance(result, ExceptionInfo): + return truncate(repr(result), RESULT_MAXLEN) + return result + + +class ResultHandler(_pool.ResultHandler): + """Handles messages from the pool processes.""" + + def __init__(self, *args, **kwargs): + self.fileno_to_outq = kwargs.pop('fileno_to_outq') + self.on_process_alive = kwargs.pop('on_process_alive') + super(ResultHandler, self).__init__(*args, **kwargs) + # add our custom message handler + self.state_handlers[WORKER_UP] = self.on_process_alive + + def _recv_message(self, add_reader, fd, callback, + __read__=__read__, readcanbuf=readcanbuf, + BytesIO=BytesIO, unpack_from=unpack_from, + load=_pickle.load): + Hr = Br = 0 + if readcanbuf: + buf = bytearray(4) + bufv = memoryview(buf) + else: + buf = bufv = BytesIO() + # header + + while Hr < 4: + try: + n = __read__( + fd, bufv[Hr:] if readcanbuf else bufv, 4 - Hr, + ) + except OSError as exc: + if get_errno(exc) not in UNAVAIL: + raise + yield + else: + if n == 0: + raise (OSError('End of file during message') if Hr + else EOFError()) + Hr += n + + body_size, = unpack_from('>i', bufv) + if readcanbuf: + buf = bytearray(body_size) + bufv = memoryview(buf) + else: + buf = bufv = BytesIO() + + while Br < body_size: + try: + n = __read__( + fd, bufv[Br:] if readcanbuf else bufv, body_size - Br, + ) + except OSError as exc: + if get_errno(exc) not in UNAVAIL: + raise + yield + else: + if n == 0: + raise (OSError('End of file during message') if Br + else EOFError()) + Br += n + add_reader(fd, self.handle_event, fd) + if readcanbuf: + message = load(BytesIO(bufv)) + else: + bufv.seek(0) + message = load(bufv) + if message: + callback(message) + + def _make_process_result(self, hub): + """Coroutine that reads messages from the pool processes + and calls the appropriate handler.""" + fileno_to_outq = self.fileno_to_outq + on_state_change = self.on_state_change + add_reader = hub.add_reader + remove_reader = hub.remove_reader + recv_message = self._recv_message + + def on_result_readable(fileno): + try: + fileno_to_outq[fileno] + except KeyError: # process gone + return remove_reader(fileno) + it = recv_message(add_reader, fileno, on_state_change) + try: + next(it) + except StopIteration: + pass + except (IOError, OSError, EOFError): + remove_reader(fileno) + else: + add_reader(fileno, it) + return on_result_readable + + def register_with_event_loop(self, hub): + self.handle_event = self._make_process_result(hub) + + def handle_event(self, fileno): + raise RuntimeError('Not registered with event loop') + + def on_stop_not_started(self): + """This method is always used to stop when the helper thread is not + started.""" + cache = self.cache + check_timeouts = self.check_timeouts + fileno_to_outq = self.fileno_to_outq + on_state_change = self.on_state_change + join_exited_workers = self.join_exited_workers + + # flush the processes outqueues until they have all terminated. + outqueues = set(fileno_to_outq) + while cache and outqueues and self._state != TERMINATE: + if check_timeouts is not None: + # make sure tasks with a time limit will time out. + check_timeouts() + # cannot iterate and remove at the same time + pending_remove_fd = set() + for fd in outqueues: + self._flush_outqueue( + fd, pending_remove_fd.discard, fileno_to_outq, + on_state_change, + ) + try: + join_exited_workers(shutdown=True) + except WorkersJoined: + return debug('result handler: all workers terminated') + outqueues.difference_update(pending_remove_fd) + + def _flush_outqueue(self, fd, remove, process_index, on_state_change): + try: + proc = process_index[fd] + except KeyError: + # process already found terminated + # which means its outqueue has already been processed + # by the worker lost handler. + return remove(fd) + + reader = proc.outq._reader + try: + setblocking(reader, 1) + except (OSError, IOError): + return remove(fd) + try: + if reader.poll(0): + task = reader.recv() + else: + task = None + sleep(0.5) + except (IOError, EOFError): + return remove(fd) + else: + if task: + on_state_change(task) + finally: + try: + setblocking(reader, 0) + except (OSError, IOError): + return remove(fd) + + +class AsynPool(_pool.Pool): + """Pool version that uses AIO instead of helper threads.""" + ResultHandler = ResultHandler + Worker = Worker + + def __init__(self, processes=None, synack=False, + sched_strategy=None, *args, **kwargs): + self.sched_strategy = SCHED_STRATEGIES.get(sched_strategy, + sched_strategy) + processes = self.cpu_count() if processes is None else processes + self.synack = synack + # create queue-pairs for all our processes in advance. + self._queues = dict((self.create_process_queues(), None) + for _ in range(processes)) + + # inqueue fileno -> process mapping + self._fileno_to_inq = {} + # outqueue fileno -> process mapping + self._fileno_to_outq = {} + # synqueue fileno -> process mapping + self._fileno_to_synq = {} + + # We keep track of processes that have not yet + # sent a WORKER_UP message. If a process fails to send + # this message within proc_up_timeout we terminate it + # and hope the next process will recover. + self._proc_alive_timeout = PROC_ALIVE_TIMEOUT + self._waiting_to_start = set() + + # denormalized set of all inqueues. + self._all_inqueues = set() + + # Set of fds being written to (busy) + self._active_writes = set() + + # Set of active co-routines currently writing jobs. + self._active_writers = set() + + # Set of fds that are busy (executing task) + self._busy_workers = set() + self._mark_worker_as_available = self._busy_workers.discard + + # Holds jobs waiting to be written to child processes. + self.outbound_buffer = deque() + + self.write_stats = Counter() + + super(AsynPool, self).__init__(processes, *args, **kwargs) + + for proc in self._pool: + # create initial mappings, these will be updated + # as processes are recycled, or found lost elsewhere. + self._fileno_to_outq[proc.outqR_fd] = proc + self._fileno_to_synq[proc.synqW_fd] = proc + self.on_soft_timeout = self._timeout_handler.on_soft_timeout + self.on_hard_timeout = self._timeout_handler.on_hard_timeout + + def _event_process_exit(self, hub, fd): + # This method is called whenever the process sentinel is readable. + hub.remove(fd) + self.maintain_pool() + + def register_with_event_loop(self, hub): + """Registers the async pool with the current event loop.""" + self._result_handler.register_with_event_loop(hub) + self.handle_result_event = self._result_handler.handle_event + self._create_timelimit_handlers(hub) + self._create_process_handlers(hub) + self._create_write_handlers(hub) + + # Add handler for when a process exits (calls maintain_pool) + [hub.add_reader(fd, self._event_process_exit, hub, fd) + for fd in self.process_sentinels] + # Handle_result_event is called whenever one of the + # result queues are readable. + [hub.add_reader(fd, self.handle_result_event, fd) + for fd in self._fileno_to_outq] + + # Timers include calling maintain_pool at a regular interval + # to be certain processes are restarted. + for handler, interval in items(self.timers): + hub.call_repeatedly(interval, handler) + + hub.on_tick.add(self.on_poll_start) + + def _create_timelimit_handlers(self, hub, now=time.time): + """For async pool this sets up the handlers used + to implement time limits.""" + call_later = hub.call_later + trefs = self._tref_for_id = WeakValueDictionary() + + def on_timeout_set(R, soft, hard): + if soft: + trefs[R._job] = call_later( + soft, self._on_soft_timeout, R._job, soft, hard, hub, + ) + elif hard: + trefs[R._job] = call_later( + hard, self._on_hard_timeout, R._job, + ) + self.on_timeout_set = on_timeout_set + + def _discard_tref(job): + try: + tref = trefs.pop(job) + tref.cancel() + del(tref) + except (KeyError, AttributeError): + pass # out of scope + self._discard_tref = _discard_tref + + def on_timeout_cancel(R): + _discard_tref(R._job) + self.on_timeout_cancel = on_timeout_cancel + + def _on_soft_timeout(self, job, soft, hard, hub, now=time.time): + # only used by async pool. + if hard: + self._tref_for_id[job] = hub.call_at( + now() + (hard - soft), self._on_hard_timeout, job, + ) + try: + result = self._cache[job] + except KeyError: + pass # job ready + else: + self.on_soft_timeout(result) + finally: + if not hard: + # remove tref + self._discard_tref(job) + + def _on_hard_timeout(self, job): + # only used by async pool. + try: + result = self._cache[job] + except KeyError: + pass # job ready + else: + self.on_hard_timeout(result) + finally: + # remove tref + self._discard_tref(job) + + def on_job_ready(self, job, i, obj, inqW_fd): + self._mark_worker_as_available(inqW_fd) + + def _create_process_handlers(self, hub, READ=READ, ERR=ERR): + """For async pool this will create the handlers called + when a process is up/down and etc.""" + add_reader, remove_reader, remove_writer = ( + hub.add_reader, hub.remove_reader, hub.remove_writer, + ) + cache = self._cache + all_inqueues = self._all_inqueues + fileno_to_inq = self._fileno_to_inq + fileno_to_outq = self._fileno_to_outq + fileno_to_synq = self._fileno_to_synq + busy_workers = self._busy_workers + event_process_exit = self._event_process_exit + handle_result_event = self.handle_result_event + process_flush_queues = self.process_flush_queues + waiting_to_start = self._waiting_to_start + + def verify_process_alive(proc): + if proc._is_alive() and proc in waiting_to_start: + assert proc.outqR_fd in fileno_to_outq + assert fileno_to_outq[proc.outqR_fd] is proc + assert proc.outqR_fd in hub.readers + error('Timed out waiting for UP message from %r', proc) + os.kill(proc.pid, 9) + + def on_process_up(proc): + """Called when a process has started.""" + # If we got the same fd as a previous process then we will also + # receive jobs in the old buffer, so we need to reset the + # job._write_to and job._scheduled_for attributes used to recover + # message boundaries when processes exit. + infd = proc.inqW_fd + for job in values(cache): + if job._write_to and job._write_to.inqW_fd == infd: + job._write_to = proc + if job._scheduled_for and job._scheduled_for.inqW_fd == infd: + job._scheduled_for = proc + fileno_to_outq[proc.outqR_fd] = proc + # maintain_pool is called whenever a process exits. + add_reader( + proc.sentinel, event_process_exit, hub, proc.sentinel, + ) + + assert not isblocking(proc.outq._reader) + + # handle_result_event is called when the processes outqueue is + # readable. + add_reader(proc.outqR_fd, handle_result_event, proc.outqR_fd) + + waiting_to_start.add(proc) + hub.call_later( + self._proc_alive_timeout, verify_process_alive, proc, + ) + + self.on_process_up = on_process_up + + def _remove_from_index(obj, proc, index, remove_fun, callback=None): + # this remove the file descriptors for a process from + # the indices. we have to make sure we don't overwrite + # another processes fds, as the fds may be reused. + try: + fd = obj.fileno() + except (IOError, OSError): + return + + try: + if index[fd] is proc: + # fd has not been reused so we can remove it from index. + index.pop(fd, None) + except KeyError: + pass + else: + remove_fun(fd) + if callback is not None: + callback(fd) + return fd + + def on_process_down(proc): + """Called when a worker process exits.""" + if proc.dead: + return + process_flush_queues(proc) + _remove_from_index( + proc.outq._reader, proc, fileno_to_outq, remove_reader, + ) + if proc.synq: + _remove_from_index( + proc.synq._writer, proc, fileno_to_synq, remove_writer, + ) + inq = _remove_from_index( + proc.inq._writer, proc, fileno_to_inq, remove_writer, + callback=all_inqueues.discard, + ) + if inq: + busy_workers.discard(inq) + remove_reader(proc.sentinel) + waiting_to_start.discard(proc) + self._active_writes.discard(proc.inqW_fd) + remove_writer(proc.inqW_fd) + remove_reader(proc.outqR_fd) + if proc.synqR_fd: + remove_reader(proc.synqR_fd) + if proc.synqW_fd: + self._active_writes.discard(proc.synqW_fd) + remove_reader(proc.synqW_fd) + self.on_process_down = on_process_down + + def _create_write_handlers(self, hub, + pack=struct.pack, dumps=_pickle.dumps, + protocol=HIGHEST_PROTOCOL): + """For async pool this creates the handlers used to write data to + child processes.""" + fileno_to_inq = self._fileno_to_inq + fileno_to_synq = self._fileno_to_synq + outbound = self.outbound_buffer + pop_message = outbound.popleft + put_message = outbound.append + all_inqueues = self._all_inqueues + active_writes = self._active_writes + active_writers = self._active_writers + busy_workers = self._busy_workers + diff = all_inqueues.difference + add_writer = hub.add_writer + hub_add, hub_remove = hub.add, hub.remove + mark_write_fd_as_active = active_writes.add + mark_write_gen_as_active = active_writers.add + mark_worker_as_busy = busy_workers.add + write_generator_done = active_writers.discard + get_job = self._cache.__getitem__ + write_stats = self.write_stats + is_fair_strategy = self.sched_strategy == SCHED_STRATEGY_FAIR + revoked_tasks = worker_state.revoked + getpid = os.getpid + + precalc = {ACK: self._create_payload(ACK, (0, )), + NACK: self._create_payload(NACK, (0, ))} + + def _put_back(job, _time=time.time): + # puts back at the end of the queue + if job._terminated is not None or \ + job.correlation_id in revoked_tasks: + if not job._accepted: + job._ack(None, _time(), getpid(), None) + job._set_terminated(job._terminated) + else: + # XXX linear lookup, should find a better way, + # but this happens rarely and is here to protect against races. + if job not in outbound: + outbound.appendleft(job) + self._put_back = _put_back + + # called for every event loop iteration, and if there + # are messages pending this will schedule writing one message + # by registering the 'schedule_writes' function for all currently + # inactive inqueues (not already being written to) + + # consolidate means the event loop will merge them + # and call the callback once with the list writable fds as + # argument. Using this means we minimize the risk of having + # the same fd receive every task if the pipe read buffer is not + # full. + if is_fair_strategy: + + def on_poll_start(): + if outbound and len(busy_workers) < len(all_inqueues): + inactive = diff(active_writes) + [hub_add(fd, None, WRITE | ERR, consolidate=True) + for fd in inactive] + else: + [hub_remove(fd) for fd in diff(active_writes)] + else: + def on_poll_start(): # noqa + if outbound: + [hub_add(fd, None, WRITE | ERR, consolidate=True) + for fd in diff(active_writes)] + else: + [hub_remove(fd) for fd in diff(active_writes)] + self.on_poll_start = on_poll_start + + def on_inqueue_close(fd, proc): + # Makes sure the fd is removed from tracking when + # the connection is closed, this is essential as fds may be reused. + busy_workers.discard(fd) + try: + if fileno_to_inq[fd] is proc: + fileno_to_inq.pop(fd, None) + active_writes.discard(fd) + all_inqueues.discard(fd) + hub_remove(fd) + except KeyError: + pass + self.on_inqueue_close = on_inqueue_close + + def schedule_writes(ready_fds, curindex=[0]): + # Schedule write operation to ready file descriptor. + # The file descriptor is writeable, but that does not + # mean the process is currently reading from the socket. + # The socket is buffered so writeable simply means that + # the buffer can accept at least 1 byte of data. + + # This means we have to cycle between the ready fds. + # the first version used shuffle, but using i % total + # is about 30% faster with many processes. The latter + # also shows more fairness in write stats when used with + # many processes [XXX On OS X, this may vary depending + # on event loop implementation (i.e select vs epoll), so + # have to test further] + total = len(ready_fds) + + for i in range(total): + ready_fd = ready_fds[curindex[0] % total] + curindex[0] += 1 + if ready_fd in active_writes: + # already writing to this fd + continue + if is_fair_strategy and ready_fd in busy_workers: + # worker is already busy with another task + continue + if ready_fd not in all_inqueues: + hub_remove(ready_fd) + continue + try: + job = pop_message() + except IndexError: + # no more messages, remove all inactive fds from the hub. + # this is important since the fds are always writeable + # as long as there's 1 byte left in the buffer, and so + # this may create a spinloop where the event loop + # always wakes up. + for inqfd in diff(active_writes): + hub_remove(inqfd) + break + + else: + if not job._accepted: # job not accepted by another worker + try: + # keep track of what process the write operation + # was scheduled for. + proc = job._scheduled_for = fileno_to_inq[ready_fd] + except KeyError: + # write was scheduled for this fd but the process + # has since exited and the message must be sent to + # another process. + put_message(job) + continue + cor = _write_job(proc, ready_fd, job) + job._writer = ref(cor) + mark_write_gen_as_active(cor) + mark_write_fd_as_active(ready_fd) + mark_worker_as_busy(ready_fd) + + # Try to write immediately, in case there's an error. + try: + next(cor) + except StopIteration: + pass + except OSError as exc: + if get_errno(exc) != errno.EBADF: + raise + else: + add_writer(ready_fd, cor) + hub.consolidate_callback = schedule_writes + + def send_job(tup): + # Schedule writing job request for when one of the process + # inqueues are writable. + body = dumps(tup, protocol=protocol) + body_size = len(body) + header = pack('>I', body_size) + # index 1,0 is the job ID. + job = get_job(tup[1][0]) + job._payload = buf_t(header), buf_t(body), body_size + put_message(job) + self._quick_put = send_job + + def on_not_recovering(proc, fd, job): + error('Process inqueue damaged: %r %r' % (proc, proc.exitcode)) + if proc._is_alive(): + proc.terminate() + hub.remove(fd) + self._put_back(job) + + def _write_job(proc, fd, job): + # writes job to the worker process. + # Operation must complete if more than one byte of data + # was written. If the broker connection is lost + # and no data was written the operation shall be cancelled. + header, body, body_size = job._payload + errors = 0 + try: + # job result keeps track of what process the job is sent to. + job._write_to = proc + send = proc.send_job_offset + + Hw = Bw = 0 + # write header + while Hw < 4: + try: + Hw += send(header, Hw) + except Exception as exc: + if get_errno(exc) not in UNAVAIL: + raise + # suspend until more data + errors += 1 + if errors > 100: + on_not_recovering(proc, fd, job) + raise StopIteration() + yield + else: + errors = 0 + + # write body + while Bw < body_size: + try: + Bw += send(body, Bw) + except Exception as exc: + if get_errno(exc) not in UNAVAIL: + raise + # suspend until more data + errors += 1 + if errors > 100: + on_not_recovering(proc, fd, job) + raise StopIteration() + yield + else: + errors = 0 + finally: + hub_remove(fd) + write_stats[proc.index] += 1 + # message written, so this fd is now available + active_writes.discard(fd) + write_generator_done(job._writer()) # is a weakref + + def send_ack(response, pid, job, fd, WRITE=WRITE, ERR=ERR): + # Only used when synack is enabled. + # Schedule writing ack response for when the fd is writeable. + msg = Ack(job, fd, precalc[response]) + callback = promise(write_generator_done) + cor = _write_ack(fd, msg, callback=callback) + mark_write_gen_as_active(cor) + mark_write_fd_as_active(fd) + callback.args = (cor, ) + add_writer(fd, cor) + self.send_ack = send_ack + + def _write_ack(fd, ack, callback=None): + # writes ack back to the worker if synack enabled. + # this operation *MUST* complete, otherwise + # the worker process will hang waiting for the ack. + header, body, body_size = ack[2] + try: + try: + proc = fileno_to_synq[fd] + except KeyError: + # process died, we can safely discard the ack at this + # point. + raise StopIteration() + send = proc.send_syn_offset + + Hw = Bw = 0 + # write header + while Hw < 4: + try: + Hw += send(header, Hw) + except Exception as exc: + if get_errno(exc) not in UNAVAIL: + raise + yield + + # write body + while Bw < body_size: + try: + Bw += send(body, Bw) + except Exception as exc: + if get_errno(exc) not in UNAVAIL: + raise + # suspend until more data + yield + finally: + if callback: + callback() + # message written, so this fd is now available + active_writes.discard(fd) + + def flush(self): + if self._state == TERMINATE: + return + # cancel all tasks that have not been accepted so that NACK is sent. + for job in values(self._cache): + if not job._accepted: + job._cancel() + + # clear the outgoing buffer as the tasks will be redelivered by + # the broker anyway. + if self.outbound_buffer: + self.outbound_buffer.clear() + + self.maintain_pool() + + try: + # ...but we must continue writing the payloads we already started + # to keep message boundaries. + # The messages may be NACK'ed later if synack is enabled. + if self._state == RUN: + # flush outgoing buffers + intervals = fxrange(0.01, 0.1, 0.01, repeatlast=True) + owned_by = {} + for job in values(self._cache): + writer = _get_job_writer(job) + if writer is not None: + owned_by[writer] = job + + while self._active_writers: + writers = list(self._active_writers) + for gen in writers: + if (gen.__name__ == '_write_job' and + gen_not_started(gen)): + # has not started writing the job so can + # discard the task, but we must also remove + # it from the Pool._cache. + try: + job = owned_by[gen] + except KeyError: + pass + else: + # removes from Pool._cache + job.discard() + self._active_writers.discard(gen) + else: + try: + job = owned_by[gen] + except KeyError: + pass + else: + job_proc = job._write_to + if job_proc._is_alive(): + self._flush_writer(job_proc, gen) + # workers may have exited in the meantime. + self.maintain_pool() + sleep(next(intervals)) # don't busyloop + finally: + self.outbound_buffer.clear() + self._active_writers.clear() + self._active_writes.clear() + self._busy_workers.clear() + + def _flush_writer(self, proc, writer): + fds = set([proc.inq._writer]) + try: + while fds: + if not proc._is_alive(): + break # process exited + readable, writable, again = _select( + writers=fds, err=fds, timeout=0.5, + ) + if not again and (writable or readable): + try: + next(writer) + except (StopIteration, OSError, IOError, EOFError): + break + finally: + self._active_writers.discard(writer) + + def get_process_queues(self): + """Get queues for a new process. + + Here we will find an unused slot, as there should always + be one available when we start a new process. + """ + return next(q for q, owner in items(self._queues) + if owner is None) + + def on_grow(self, n): + """Grow the pool by ``n`` proceses.""" + diff = max(self._processes - len(self._queues), 0) + if diff: + self._queues.update( + dict((self.create_process_queues(), None) for _ in range(diff)) + ) + + def on_shrink(self, n): + """Shrink the pool by ``n`` processes.""" + pass + + def create_process_queues(self): + """Creates new in, out (and optionally syn) queues, + returned as a tuple.""" + # NOTE: Pipes must be set O_NONBLOCK at creation time (the original + # fd), otherwise it will not be possible to change the flags until + # there is an actual reader/writer on the other side. + inq = _SimpleQueue(wnonblock=True) + outq = _SimpleQueue(rnonblock=True) + synq = None + assert isblocking(inq._reader) + assert not isblocking(inq._writer) + assert not isblocking(outq._reader) + assert isblocking(outq._writer) + if self.synack: + synq = _SimpleQueue(wnonblock=True) + assert isblocking(synq._reader) + assert not isblocking(synq._writer) + return inq, outq, synq + + def on_process_alive(self, pid): + """Handler called when the :const:`WORKER_UP` message is received + from a child process, which marks the process as ready + to receive work.""" + try: + proc = next(w for w in self._pool if w.pid == pid) + except StopIteration: + return logger.warning('process with pid=%s already exited', pid) + assert proc.inqW_fd not in self._fileno_to_inq + assert proc.inqW_fd not in self._all_inqueues + self._waiting_to_start.discard(proc) + self._fileno_to_inq[proc.inqW_fd] = proc + self._fileno_to_synq[proc.synqW_fd] = proc + self._all_inqueues.add(proc.inqW_fd) + + def on_job_process_down(self, job, pid_gone): + """Handler called for each job when the process it was assigned to + exits.""" + if job._write_to and not job._write_to._is_alive(): + # job was partially written + self.on_partial_read(job, job._write_to) + elif job._scheduled_for and not job._scheduled_for._is_alive(): + # job was only scheduled to be written to this process, + # but no data was sent so put it back on the outbound_buffer. + self._put_back(job) + + def on_job_process_lost(self, job, pid, exitcode): + """Handler called for each *started* job when the process it + was assigned to exited by mysterious means (error exitcodes and + signals)""" + self.mark_as_worker_lost(job, exitcode) + + def human_write_stats(self): + if self.write_stats is None: + return 'N/A' + vals = list(values(self.write_stats)) + total = sum(vals) + + def per(v, total): + return '{0:.2f}%'.format((float(v) / total) * 100.0 if v else 0) + + return { + 'total': total, + 'avg': per(total / len(self.write_stats) if total else 0, total), + 'all': ', '.join(per(v, total) for v in vals), + 'raw': ', '.join(map(str, vals)), + 'inqueues': { + 'total': len(self._all_inqueues), + 'active': len(self._active_writes), + } + } + + def _process_cleanup_queues(self, proc): + """Handler called to clean up a processes queues after process + exit.""" + if not proc.dead: + try: + self._queues[self._find_worker_queues(proc)] = None + except (KeyError, ValueError): + pass + + @staticmethod + def _stop_task_handler(task_handler): + """Called at shutdown to tell processes that we are shutting down.""" + for proc in task_handler.pool: + try: + setblocking(proc.inq._writer, 1) + except (OSError, IOError): + pass + else: + try: + proc.inq.put(None) + except OSError as exc: + if get_errno(exc) != errno.EBADF: + raise + + def create_result_handler(self): + return super(AsynPool, self).create_result_handler( + fileno_to_outq=self._fileno_to_outq, + on_process_alive=self.on_process_alive, + ) + + def _process_register_queues(self, proc, queues): + """Marks new ownership for ``queues`` so that the fileno indices are + updated.""" + assert queues in self._queues + b = len(self._queues) + self._queues[queues] = proc + assert b == len(self._queues) + + def _find_worker_queues(self, proc): + """Find the queues owned by ``proc``.""" + try: + return next(q for q, owner in items(self._queues) + if owner == proc) + except StopIteration: + raise ValueError(proc) + + def _setup_queues(self): + # this is only used by the original pool which uses a shared + # queue for all processes. + + # these attributes makes no sense for us, but we will still + # have to initialize them. + self._inqueue = self._outqueue = \ + self._quick_put = self._quick_get = self._poll_result = None + + def process_flush_queues(self, proc): + """Flushes all queues, including the outbound buffer, so that + all tasks that have not been started will be discarded. + + In Celery this is called whenever the transport connection is lost + (consumer restart). + + """ + resq = proc.outq._reader + on_state_change = self._result_handler.on_state_change + fds = set([resq]) + while fds and not resq.closed and self._state != TERMINATE: + readable, _, again = _select(fds, None, fds, timeout=0.01) + if readable: + try: + task = resq.recv() + except (OSError, IOError, EOFError) as exc: + if get_errno(exc) == errno.EINTR: + continue + elif get_errno(exc) == errno.EAGAIN: + break + else: + debug('got %r while flushing process %r', + exc, proc, exc_info=1) + if get_errno(exc) not in UNAVAIL: + debug('got %r while flushing process %r', + exc, proc, exc_info=1) + break + else: + if task is None: + debug('got sentinel while flushing process %r', proc) + break + else: + on_state_change(task) + else: + break + + def on_partial_read(self, job, proc): + """Called when a job was only partially written to a child process + and it exited.""" + # worker terminated by signal: + # we cannot reuse the sockets again, because we don't know if + # the process wrote/read anything frmo them, and if so we cannot + # restore the message boundaries. + if not job._accepted: + # job was not acked, so find another worker to send it to. + self._put_back(job) + writer = _get_job_writer(job) + if writer: + self._active_writers.discard(writer) + del(writer) + + if not proc.dead: + proc.dead = True + # Replace queues to avoid reuse + before = len(self._queues) + try: + queues = self._find_worker_queues(proc) + if self.destroy_queues(queues, proc): + self._queues[self.create_process_queues()] = None + except ValueError: + pass + assert len(self._queues) == before + + def destroy_queues(self, queues, proc): + """Destroy queues that can no longer be used, so that they + be replaced by new sockets.""" + assert not proc._is_alive() + self._waiting_to_start.discard(proc) + removed = 1 + try: + self._queues.pop(queues) + except KeyError: + removed = 0 + try: + self.on_inqueue_close(queues[0]._writer.fileno(), proc) + except IOError: + pass + for queue in queues: + if queue: + for sock in (queue._reader, queue._writer): + if not sock.closed: + try: + sock.close() + except (IOError, OSError): + pass + return removed + + def _create_payload(self, type_, args, + dumps=_pickle.dumps, pack=struct.pack, + protocol=HIGHEST_PROTOCOL): + body = dumps((type_, args), protocol=protocol) + size = len(body) + header = pack('>I', size) + return header, body, size + + @classmethod + def _set_result_sentinel(cls, _outqueue, _pool): + # unused + pass + + def _help_stuff_finish_args(self): + # Pool._help_stuff_finished is a classmethod so we have to use this + # trick to modify the arguments passed to it. + return (self._pool, ) + + @classmethod + def _help_stuff_finish(cls, pool): + debug( + 'removing tasks from inqueue until task handler finished', + ) + fileno_to_proc = {} + inqR = set() + for w in pool: + try: + fd = w.inq._reader.fileno() + inqR.add(fd) + fileno_to_proc[fd] = w + except IOError: + pass + while inqR: + readable, _, again = _select(inqR, timeout=0.5) + if again: + continue + if not readable: + break + for fd in readable: + fileno_to_proc[fd].inq._reader.recv() + sleep(0) + + @property + def timers(self): + return {self.maintain_pool: 5.0} diff --git a/celery/concurrency/base.py b/celery/concurrency/base.py new file mode 100644 index 0000000..29c348d --- /dev/null +++ b/celery/concurrency/base.py @@ -0,0 +1,171 @@ +# -*- coding: utf-8 -*- +""" + celery.concurrency.base + ~~~~~~~~~~~~~~~~~~~~~~~ + + TaskPool interface. + +""" +from __future__ import absolute_import + +import logging +import os +import sys + +from billiard.einfo import ExceptionInfo +from billiard.exceptions import WorkerLostError +from kombu.utils.encoding import safe_repr + +from celery.exceptions import WorkerShutdown, WorkerTerminate +from celery.five import monotonic, reraise +from celery.utils import timer2 +from celery.utils.text import truncate +from celery.utils.log import get_logger + +__all__ = ['BasePool', 'apply_target'] + +logger = get_logger('celery.pool') + + +def apply_target(target, args=(), kwargs={}, callback=None, + accept_callback=None, pid=None, getpid=os.getpid, + propagate=(), monotonic=monotonic, **_): + if accept_callback: + accept_callback(pid or getpid(), monotonic()) + try: + ret = target(*args, **kwargs) + except propagate: + raise + except Exception: + raise + except (WorkerShutdown, WorkerTerminate): + raise + except BaseException as exc: + try: + reraise(WorkerLostError, WorkerLostError(repr(exc)), + sys.exc_info()[2]) + except WorkerLostError: + callback(ExceptionInfo()) + else: + callback(ret) + + +class BasePool(object): + RUN = 0x1 + CLOSE = 0x2 + TERMINATE = 0x3 + + Timer = timer2.Timer + + #: set to true if the pool can be shutdown from within + #: a signal handler. + signal_safe = True + + #: set to true if pool uses greenlets. + is_green = False + + _state = None + _pool = None + + #: only used by multiprocessing pool + uses_semaphore = False + + task_join_will_block = True + + def __init__(self, limit=None, putlocks=True, + forking_enable=True, callbacks_propagate=(), **options): + self.limit = limit + self.putlocks = putlocks + self.options = options + self.forking_enable = forking_enable + self.callbacks_propagate = callbacks_propagate + self._does_debug = logger.isEnabledFor(logging.DEBUG) + + def on_start(self): + pass + + def did_start_ok(self): + return True + + def flush(self): + pass + + def on_stop(self): + pass + + def register_with_event_loop(self, loop): + pass + + def on_apply(self, *args, **kwargs): + pass + + def on_terminate(self): + pass + + def on_soft_timeout(self, job): + pass + + def on_hard_timeout(self, job): + pass + + def maintain_pool(self, *args, **kwargs): + pass + + def terminate_job(self, pid, signal=None): + raise NotImplementedError( + '{0} does not implement kill_job'.format(type(self))) + + def restart(self): + raise NotImplementedError( + '{0} does not implement restart'.format(type(self))) + + def stop(self): + self.on_stop() + self._state = self.TERMINATE + + def terminate(self): + self._state = self.TERMINATE + self.on_terminate() + + def start(self): + self.on_start() + self._state = self.RUN + + def close(self): + self._state = self.CLOSE + self.on_close() + + def on_close(self): + pass + + def apply_async(self, target, args=[], kwargs={}, **options): + """Equivalent of the :func:`apply` built-in function. + + Callbacks should optimally return as soon as possible since + otherwise the thread which handles the result will get blocked. + + """ + if self._does_debug: + logger.debug('TaskPool: Apply %s (args:%s kwargs:%s)', + target, truncate(safe_repr(args), 1024), + truncate(safe_repr(kwargs), 1024)) + + return self.on_apply(target, args, kwargs, + waitforslot=self.putlocks, + callbacks_propagate=self.callbacks_propagate, + **options) + + def _get_info(self): + return {} + + @property + def info(self): + return self._get_info() + + @property + def active(self): + return self._state == self.RUN + + @property + def num_processes(self): + return self.limit diff --git a/celery/concurrency/eventlet.py b/celery/concurrency/eventlet.py new file mode 100644 index 0000000..00082dd --- /dev/null +++ b/celery/concurrency/eventlet.py @@ -0,0 +1,161 @@ +# -*- coding: utf-8 -*- +""" + celery.concurrency.eventlet + ~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + Eventlet pool implementation. + +""" +from __future__ import absolute_import + +import sys + +from time import time + +__all__ = ['TaskPool'] + +W_RACE = """\ +Celery module with %s imported before eventlet patched\ +""" +RACE_MODS = ('billiard.', 'celery.', 'kombu.') + + +#: Warn if we couldn't patch early enough, +#: and thread/socket depending celery modules have already been loaded. +for mod in (mod for mod in sys.modules if mod.startswith(RACE_MODS)): + for side in ('thread', 'threading', 'socket'): # pragma: no cover + if getattr(mod, side, None): + import warnings + warnings.warn(RuntimeWarning(W_RACE % side)) + + +from celery import signals +from celery.utils import timer2 + +from . import base + + +def apply_target(target, args=(), kwargs={}, callback=None, + accept_callback=None, getpid=None): + return base.apply_target(target, args, kwargs, callback, accept_callback, + pid=getpid()) + + +class Schedule(timer2.Schedule): + + def __init__(self, *args, **kwargs): + from eventlet.greenthread import spawn_after + from greenlet import GreenletExit + super(Schedule, self).__init__(*args, **kwargs) + + self.GreenletExit = GreenletExit + self._spawn_after = spawn_after + self._queue = set() + + def _enter(self, eta, priority, entry): + secs = max(eta - time(), 0) + g = self._spawn_after(secs, entry) + self._queue.add(g) + g.link(self._entry_exit, entry) + g.entry = entry + g.eta = eta + g.priority = priority + g.cancelled = False + return g + + def _entry_exit(self, g, entry): + try: + try: + g.wait() + except self.GreenletExit: + entry.cancel() + g.cancelled = True + finally: + self._queue.discard(g) + + def clear(self): + queue = self._queue + while queue: + try: + queue.pop().cancel() + except (KeyError, self.GreenletExit): + pass + + @property + def queue(self): + return self._queue + + +class Timer(timer2.Timer): + Schedule = Schedule + + def ensure_started(self): + pass + + def stop(self): + self.schedule.clear() + + def cancel(self, tref): + try: + tref.cancel() + except self.schedule.GreenletExit: + pass + + def start(self): + pass + + +class TaskPool(base.BasePool): + Timer = Timer + + signal_safe = False + is_green = True + task_join_will_block = False + + def __init__(self, *args, **kwargs): + from eventlet import greenthread + from eventlet.greenpool import GreenPool + self.Pool = GreenPool + self.getcurrent = greenthread.getcurrent + self.getpid = lambda: id(greenthread.getcurrent()) + self.spawn_n = greenthread.spawn_n + + super(TaskPool, self).__init__(*args, **kwargs) + + def on_start(self): + self._pool = self.Pool(self.limit) + signals.eventlet_pool_started.send(sender=self) + self._quick_put = self._pool.spawn_n + self._quick_apply_sig = signals.eventlet_pool_apply.send + + def on_stop(self): + signals.eventlet_pool_preshutdown.send(sender=self) + if self._pool is not None: + self._pool.waitall() + signals.eventlet_pool_postshutdown.send(sender=self) + + def on_apply(self, target, args=None, kwargs=None, callback=None, + accept_callback=None, **_): + self._quick_apply_sig( + sender=self, target=target, args=args, kwargs=kwargs, + ) + self._quick_put(apply_target, target, args, kwargs, + callback, accept_callback, + self.getpid) + + def grow(self, n=1): + limit = self.limit + n + self._pool.resize(limit) + self.limit = limit + + def shrink(self, n=1): + limit = self.limit - n + self._pool.resize(limit) + self.limit = limit + + def _get_info(self): + return { + 'max-concurrency': self.limit, + 'free-threads': self._pool.free(), + 'running-threads': self._pool.running(), + } diff --git a/celery/concurrency/gevent.py b/celery/concurrency/gevent.py new file mode 100644 index 0000000..f89de92 --- /dev/null +++ b/celery/concurrency/gevent.py @@ -0,0 +1,136 @@ +# -*- coding: utf-8 -*- +""" + celery.concurrency.gevent + ~~~~~~~~~~~~~~~~~~~~~~~~~ + + gevent pool implementation. + +""" +from __future__ import absolute_import + +from time import time + +try: + from gevent import Timeout +except ImportError: # pragma: no cover + Timeout = None # noqa + +from celery.utils import timer2 + +from .base import apply_target, BasePool + +__all__ = ['TaskPool'] + + +def apply_timeout(target, args=(), kwargs={}, callback=None, + accept_callback=None, pid=None, timeout=None, + timeout_callback=None, Timeout=Timeout, + apply_target=apply_target, **rest): + try: + with Timeout(timeout): + return apply_target(target, args, kwargs, callback, + accept_callback, pid, + propagate=(Timeout, ), **rest) + except Timeout: + return timeout_callback(False, timeout) + + +class Schedule(timer2.Schedule): + + def __init__(self, *args, **kwargs): + from gevent.greenlet import Greenlet, GreenletExit + + class _Greenlet(Greenlet): + cancel = Greenlet.kill + + self._Greenlet = _Greenlet + self._GreenletExit = GreenletExit + super(Schedule, self).__init__(*args, **kwargs) + self._queue = set() + + def _enter(self, eta, priority, entry): + secs = max(eta - time(), 0) + g = self._Greenlet.spawn_later(secs, entry) + self._queue.add(g) + g.link(self._entry_exit) + g.entry = entry + g.eta = eta + g.priority = priority + g.cancelled = False + return g + + def _entry_exit(self, g): + try: + g.kill() + finally: + self._queue.discard(g) + + def clear(self): + queue = self._queue + while queue: + try: + queue.pop().kill() + except KeyError: + pass + + @property + def queue(self): + return self._queue + + +class Timer(timer2.Timer): + Schedule = Schedule + + def ensure_started(self): + pass + + def stop(self): + self.schedule.clear() + + def start(self): + pass + + +class TaskPool(BasePool): + Timer = Timer + + signal_safe = False + is_green = True + task_join_will_block = False + + def __init__(self, *args, **kwargs): + from gevent import spawn_raw + from gevent.pool import Pool + self.Pool = Pool + self.spawn_n = spawn_raw + self.timeout = kwargs.get('timeout') + super(TaskPool, self).__init__(*args, **kwargs) + + def on_start(self): + self._pool = self.Pool(self.limit) + self._quick_put = self._pool.spawn + + def on_stop(self): + if self._pool is not None: + self._pool.join() + + def on_apply(self, target, args=None, kwargs=None, callback=None, + accept_callback=None, timeout=None, + timeout_callback=None, **_): + timeout = self.timeout if timeout is None else timeout + return self._quick_put(apply_timeout if timeout else apply_target, + target, args, kwargs, callback, accept_callback, + timeout=timeout, + timeout_callback=timeout_callback) + + def grow(self, n=1): + self._pool._semaphore.counter += n + self._pool.size += n + + def shrink(self, n=1): + self._pool._semaphore.counter -= n + self._pool.size -= n + + @property + def num_processes(self): + return len(self._pool) diff --git a/celery/concurrency/prefork.py b/celery/concurrency/prefork.py new file mode 100644 index 0000000..b579d0e --- /dev/null +++ b/celery/concurrency/prefork.py @@ -0,0 +1,175 @@ +# -*- coding: utf-8 -*- +""" + celery.concurrency.prefork + ~~~~~~~~~~~~~~~~~~~~~~~~~~ + + Pool implementation using :mod:`multiprocessing`. + +""" +from __future__ import absolute_import + +import os + +from billiard import forking_enable +from billiard.pool import RUN, CLOSE, Pool as BlockingPool + +from celery import platforms +from celery import signals +from celery._state import set_default_app, _set_task_join_will_block +from celery.app import trace +from celery.concurrency.base import BasePool +from celery.five import items +from celery.utils.functional import noop +from celery.utils.log import get_logger + +from .asynpool import AsynPool + +__all__ = ['TaskPool', 'process_initializer', 'process_destructor'] + +#: List of signals to reset when a child process starts. +WORKER_SIGRESET = frozenset(['SIGTERM', + 'SIGHUP', + 'SIGTTIN', + 'SIGTTOU', + 'SIGUSR1']) + +#: List of signals to ignore when a child process starts. +WORKER_SIGIGNORE = frozenset(['SIGINT']) + +logger = get_logger(__name__) +warning, debug = logger.warning, logger.debug + + +def process_initializer(app, hostname): + """Pool child process initializer. + + This will initialize a child pool process to ensure the correct + app instance is used and things like + logging works. + + """ + _set_task_join_will_block(True) + platforms.signals.reset(*WORKER_SIGRESET) + platforms.signals.ignore(*WORKER_SIGIGNORE) + platforms.set_mp_process_title('celeryd', hostname=hostname) + # This is for Windows and other platforms not supporting + # fork(). Note that init_worker makes sure it's only + # run once per process. + app.loader.init_worker() + app.loader.init_worker_process() + logfile = os.environ.get('CELERY_LOG_FILE') or None + if logfile and '%i' in logfile.lower(): + # logfile path will differ so need to set up logging again. + app.log.already_setup = False + app.log.setup(int(os.environ.get('CELERY_LOG_LEVEL', 0) or 0), + logfile, + bool(os.environ.get('CELERY_LOG_REDIRECT', False)), + str(os.environ.get('CELERY_LOG_REDIRECT_LEVEL')), + hostname=hostname) + if os.environ.get('FORKED_BY_MULTIPROCESSING'): + # pool did execv after fork + trace.setup_worker_optimizations(app) + else: + app.set_current() + set_default_app(app) + app.finalize() + trace._tasks = app._tasks # enables fast_trace_task optimization. + # rebuild execution handler for all tasks. + from celery.app.trace import build_tracer + for name, task in items(app.tasks): + task.__trace__ = build_tracer(name, task, app.loader, hostname, + app=app) + signals.worker_process_init.send(sender=None) + + +def process_destructor(pid, exitcode): + """Pool child process destructor + + Dispatch the :signal:`worker_process_shutdown` signal. + + """ + signals.worker_process_shutdown.send( + sender=None, pid=pid, exitcode=exitcode, + ) + + +class TaskPool(BasePool): + """Multiprocessing Pool implementation.""" + Pool = AsynPool + BlockingPool = BlockingPool + + uses_semaphore = True + write_stats = None + + def on_start(self): + """Run the task pool. + + Will pre-fork all workers so they're ready to accept tasks. + + """ + forking_enable(self.forking_enable) + Pool = (self.BlockingPool if self.options.get('threads', True) + else self.Pool) + P = self._pool = Pool(processes=self.limit, + initializer=process_initializer, + on_process_exit=process_destructor, + synack=False, + **self.options) + + # Create proxy methods + self.on_apply = P.apply_async + self.maintain_pool = P.maintain_pool + self.terminate_job = P.terminate_job + self.grow = P.grow + self.shrink = P.shrink + self.flush = getattr(P, 'flush', None) # FIXME add to billiard + + def restart(self): + self._pool.restart() + self._pool.apply_async(noop) + + def did_start_ok(self): + return self._pool.did_start_ok() + + def register_with_event_loop(self, loop): + try: + reg = self._pool.register_with_event_loop + except AttributeError: + return + return reg(loop) + + def on_stop(self): + """Gracefully stop the pool.""" + if self._pool is not None and self._pool._state in (RUN, CLOSE): + self._pool.close() + self._pool.join() + self._pool = None + + def on_terminate(self): + """Force terminate the pool.""" + if self._pool is not None: + self._pool.terminate() + self._pool = None + + def on_close(self): + if self._pool is not None and self._pool._state == RUN: + self._pool.close() + + def _get_info(self): + try: + write_stats = self._pool.human_write_stats + except AttributeError: + write_stats = lambda: 'N/A' # only supported by asynpool + return { + 'max-concurrency': self.limit, + 'processes': [p.pid for p in self._pool._pool], + 'max-tasks-per-child': self._pool._maxtasksperchild or 'N/A', + 'put-guarded-by-semaphore': self.putlocks, + 'timeouts': (self._pool.soft_timeout or 0, + self._pool.timeout or 0), + 'writes': write_stats() + } + + @property + def num_processes(self): + return self._pool._processes diff --git a/celery/concurrency/solo.py b/celery/concurrency/solo.py new file mode 100644 index 0000000..a2dc199 --- /dev/null +++ b/celery/concurrency/solo.py @@ -0,0 +1,30 @@ +# -*- coding: utf-8 -*- +""" + celery.concurrency.solo + ~~~~~~~~~~~~~~~~~~~~~~~ + + Single-threaded pool implementation. + +""" +from __future__ import absolute_import + +import os + +from .base import BasePool, apply_target + +__all__ = ['TaskPool'] + + +class TaskPool(BasePool): + """Solo task pool (blocking, inline, fast).""" + + def __init__(self, *args, **kwargs): + super(TaskPool, self).__init__(*args, **kwargs) + self.on_apply = apply_target + + def _get_info(self): + return {'max-concurrency': 1, + 'processes': [os.getpid()], + 'max-tasks-per-child': None, + 'put-guarded-by-semaphore': True, + 'timeouts': ()} diff --git a/celery/concurrency/threads.py b/celery/concurrency/threads.py new file mode 100644 index 0000000..fee901e --- /dev/null +++ b/celery/concurrency/threads.py @@ -0,0 +1,57 @@ +# -*- coding: utf-8 -*- +""" + celery.concurrency.threads + ~~~~~~~~~~~~~~~~~~~~~~~~~~ + + Pool implementation using threads. + +""" +from __future__ import absolute_import + +from celery.five import UserDict + +from .base import apply_target, BasePool + +__all__ = ['TaskPool'] + + +class NullDict(UserDict): + + def __setitem__(self, key, value): + pass + + +class TaskPool(BasePool): + + def __init__(self, *args, **kwargs): + try: + import threadpool + except ImportError: + raise ImportError( + 'The threaded pool requires the threadpool module.') + self.WorkRequest = threadpool.WorkRequest + self.ThreadPool = threadpool.ThreadPool + super(TaskPool, self).__init__(*args, **kwargs) + + def on_start(self): + self._pool = self.ThreadPool(self.limit) + # threadpool stores all work requests until they are processed + # we don't need this dict, and it occupies way too much memory. + self._pool.workRequests = NullDict() + self._quick_put = self._pool.putRequest + self._quick_clear = self._pool._results_queue.queue.clear + + def on_stop(self): + self._pool.dismissWorkers(self.limit, do_join=True) + + def on_apply(self, target, args=None, kwargs=None, callback=None, + accept_callback=None, **_): + req = self.WorkRequest(apply_target, (target, args, kwargs, callback, + accept_callback)) + self._quick_put(req) + # threadpool also has callback support, + # but for some reason the callback is not triggered + # before you've collected the results. + # Clear the results (if any), so it doesn't grow too large. + self._quick_clear() + return req diff --git a/celery/contrib/__init__.py b/celery/contrib/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/celery/contrib/abortable.py b/celery/contrib/abortable.py new file mode 100644 index 0000000..dcdc615 --- /dev/null +++ b/celery/contrib/abortable.py @@ -0,0 +1,172 @@ +# -*- coding: utf-8 -*- +""" +========================= +Abortable tasks overview +========================= + +For long-running :class:`Task`'s, it can be desirable to support +aborting during execution. Of course, these tasks should be built to +support abortion specifically. + +The :class:`AbortableTask` serves as a base class for all :class:`Task` +objects that should support abortion by producers. + +* Producers may invoke the :meth:`abort` method on + :class:`AbortableAsyncResult` instances, to request abortion. + +* Consumers (workers) should periodically check (and honor!) the + :meth:`is_aborted` method at controlled points in their task's + :meth:`run` method. The more often, the better. + +The necessary intermediate communication is dealt with by the +:class:`AbortableTask` implementation. + +Usage example +------------- + +In the consumer: + +.. code-block:: python + + from __future__ import absolute_import + + from celery.contrib.abortable import AbortableTask + from celery.utils.log import get_task_logger + + from proj.celery import app + + logger = get_logger(__name__) + + @app.task(bind=True, base=AbortableTask) + def long_running_task(self): + results = [] + for i in range(100): + # check after every 5 iterations... + # (or alternatively, check when some timer is due) + if not i % 5: + if self.is_aborted(): + # respect aborted state, and terminate gracefully. + logger.warning('Task aborted') + return + value = do_something_expensive(i) + results.append(y) + logger.info('Task complete') + return results + +In the producer: + +.. code-block:: python + + from __future__ import absolute_import + + import time + + from proj.tasks import MyLongRunningTask + + def myview(request): + # result is of type AbortableAsyncResult + result = long_running_task.delay() + + # abort the task after 10 seconds + time.sleep(10) + result.abort() + +After the `result.abort()` call, the task execution is not +aborted immediately. In fact, it is not guaranteed to abort at all. Keep +checking `result.state` status, or call `result.get(timeout=)` to +have it block until the task is finished. + +.. note:: + + In order to abort tasks, there needs to be communication between the + producer and the consumer. This is currently implemented through the + database backend. Therefore, this class will only work with the + database backends. + +""" +from __future__ import absolute_import + +from celery import Task +from celery.result import AsyncResult + +__all__ = ['AbortableAsyncResult', 'AbortableTask'] + + +""" +Task States +----------- + +.. state:: ABORTED + +ABORTED +~~~~~~~ + +Task is aborted (typically by the producer) and should be +aborted as soon as possible. + +""" +ABORTED = 'ABORTED' + + +class AbortableAsyncResult(AsyncResult): + """Represents a abortable result. + + Specifically, this gives the `AsyncResult` a :meth:`abort()` method, + which sets the state of the underlying Task to `'ABORTED'`. + + """ + + def is_aborted(self): + """Return :const:`True` if the task is (being) aborted.""" + return self.state == ABORTED + + def abort(self): + """Set the state of the task to :const:`ABORTED`. + + Abortable tasks monitor their state at regular intervals and + terminate execution if so. + + Be aware that invoking this method does not guarantee when the + task will be aborted (or even if the task will be aborted at + all). + + """ + # TODO: store_result requires all four arguments to be set, + # but only status should be updated here + return self.backend.store_result(self.id, result=None, + status=ABORTED, traceback=None) + + +class AbortableTask(Task): + """A celery task that serves as a base class for all :class:`Task`'s + that support aborting during execution. + + All subclasses of :class:`AbortableTask` must call the + :meth:`is_aborted` method periodically and act accordingly when + the call evaluates to :const:`True`. + + """ + abstract = True + + def AsyncResult(self, task_id): + """Return the accompanying AbortableAsyncResult instance.""" + return AbortableAsyncResult(task_id, backend=self.backend) + + def is_aborted(self, **kwargs): + """Checks against the backend whether this + :class:`AbortableAsyncResult` is :const:`ABORTED`. + + Always return :const:`False` in case the `task_id` parameter + refers to a regular (non-abortable) :class:`Task`. + + Be aware that invoking this method will cause a hit in the + backend (for example a database query), so find a good balance + between calling it regularly (for responsiveness), but not too + often (for performance). + + """ + task_id = kwargs.get('task_id', self.request.id) + result = self.AsyncResult(task_id) + if not isinstance(result, AbortableAsyncResult): + return False + return result.is_aborted() diff --git a/celery/contrib/batches.py b/celery/contrib/batches.py new file mode 100644 index 0000000..8cabc6f --- /dev/null +++ b/celery/contrib/batches.py @@ -0,0 +1,249 @@ +# -*- coding: utf-8 -*- +""" +celery.contrib.batches +====================== + +Experimental task class that buffers messages and processes them as a list. + +.. warning:: + + For this to work you have to set + :setting:`CELERYD_PREFETCH_MULTIPLIER` to zero, or some value where + the final multiplied value is higher than ``flush_every``. + + In the future we hope to add the ability to direct batching tasks + to a channel with different QoS requirements than the task channel. + +**Simple Example** + +A click counter that flushes the buffer every 100 messages, and every +seconds. Does not do anything with the data, but can easily be modified +to store it in a database. + +.. code-block:: python + + # Flush after 100 messages, or 10 seconds. + @app.task(base=Batches, flush_every=100, flush_interval=10) + def count_click(requests): + from collections import Counter + count = Counter(request.kwargs['url'] for request in requests) + for url, count in count.items(): + print('>>> Clicks: {0} -> {1}'.format(url, count)) + + +Then you can ask for a click to be counted by doing:: + + >>> count_click.delay('http://example.com') + +**Example returning results** + +An interface to the Web of Trust API that flushes the buffer every 100 +messages, and every 10 seconds. + +.. code-block:: python + + import requests + from urlparse import urlparse + + from celery.contrib.batches import Batches + + wot_api_target = 'https://api.mywot.com/0.4/public_link_json' + + @app.task(base=Batches, flush_every=100, flush_interval=10) + def wot_api(requests): + sig = lambda url: url + reponses = wot_api_real( + (sig(*request.args, **request.kwargs) for request in requests) + ) + # use mark_as_done to manually return response data + for response, request in zip(reponses, requests): + app.backend.mark_as_done(request.id, response) + + + def wot_api_real(urls): + domains = [urlparse(url).netloc for url in urls] + response = requests.get( + wot_api_target, + params={'hosts': ('/').join(set(domains)) + '/'} + ) + return [response.json[domain] for domain in domains] + +Using the API is done as follows:: + + >>> wot_api.delay('http://example.com') + +.. note:: + + If you don't have an ``app`` instance then use the current app proxy + instead:: + + from celery import current_app + app.backend.mark_as_done(request.id, response) + +""" +from __future__ import absolute_import + +from itertools import count + +from celery.task import Task +from celery.five import Empty, Queue +from celery.utils.log import get_logger +from celery.worker.job import Request +from celery.utils import noop + +__all__ = ['Batches'] + +logger = get_logger(__name__) + + +def consume_queue(queue): + """Iterator yielding all immediately available items in a + :class:`Queue.Queue`. + + The iterator stops as soon as the queue raises :exc:`Queue.Empty`. + + *Examples* + + >>> q = Queue() + >>> map(q.put, range(4)) + >>> list(consume_queue(q)) + [0, 1, 2, 3] + >>> list(consume_queue(q)) + [] + + """ + get = queue.get_nowait + while 1: + try: + yield get() + except Empty: + break + + +def apply_batches_task(task, args, loglevel, logfile): + task.push_request(loglevel=loglevel, logfile=logfile) + try: + result = task(*args) + except Exception as exc: + result = None + logger.error('Error: %r', exc, exc_info=True) + finally: + task.pop_request() + return result + + +class SimpleRequest(object): + """Pickleable request.""" + + #: task id + id = None + + #: task name + name = None + + #: positional arguments + args = () + + #: keyword arguments + kwargs = {} + + #: message delivery information. + delivery_info = None + + #: worker node name + hostname = None + + def __init__(self, id, name, args, kwargs, delivery_info, hostname): + self.id = id + self.name = name + self.args = args + self.kwargs = kwargs + self.delivery_info = delivery_info + self.hostname = hostname + + @classmethod + def from_request(cls, request): + return cls(request.id, request.name, request.args, + request.kwargs, request.delivery_info, request.hostname) + + +class Batches(Task): + abstract = True + + #: Maximum number of message in buffer. + flush_every = 10 + + #: Timeout in seconds before buffer is flushed anyway. + flush_interval = 30 + + def __init__(self): + self._buffer = Queue() + self._count = count(1) + self._tref = None + self._pool = None + + def run(self, requests): + raise NotImplementedError('must implement run(requests)') + + def Strategy(self, task, app, consumer): + self._pool = consumer.pool + hostname = consumer.hostname + eventer = consumer.event_dispatcher + Req = Request + connection_errors = consumer.connection_errors + timer = consumer.timer + put_buffer = self._buffer.put + flush_buffer = self._do_flush + + def task_message_handler(message, body, ack, reject, callbacks, **kw): + request = Req(body, on_ack=ack, app=app, hostname=hostname, + events=eventer, task=task, + connection_errors=connection_errors, + delivery_info=message.delivery_info) + put_buffer(request) + + if self._tref is None: # first request starts flush timer. + self._tref = timer.call_repeatedly( + self.flush_interval, flush_buffer, + ) + + if not next(self._count) % self.flush_every: + flush_buffer() + + return task_message_handler + + def flush(self, requests): + return self.apply_buffer(requests, ([SimpleRequest.from_request(r) + for r in requests], )) + + def _do_flush(self): + logger.debug('Batches: Wake-up to flush buffer...') + requests = None + if self._buffer.qsize(): + requests = list(consume_queue(self._buffer)) + if requests: + logger.debug('Batches: Buffer complete: %s', len(requests)) + self.flush(requests) + if not requests: + logger.debug('Batches: Cancelling timer: Nothing in buffer.') + if self._tref: + self._tref.cancel() # cancel timer. + self._tref = None + + def apply_buffer(self, requests, args=(), kwargs={}): + acks_late = [], [] + [acks_late[r.task.acks_late].append(r) for r in requests] + assert requests and (acks_late[True] or acks_late[False]) + + def on_accepted(pid, time_accepted): + [req.acknowledge() for req in acks_late[False]] + + def on_return(result): + [req.acknowledge() for req in acks_late[True]] + + return self._pool.apply_async( + apply_batches_task, + (self, args, 0, None), + accept_callback=on_accepted, + callback=acks_late[True] and on_return or noop, + ) diff --git a/celery/contrib/methods.py b/celery/contrib/methods.py new file mode 100644 index 0000000..56aa7f4 --- /dev/null +++ b/celery/contrib/methods.py @@ -0,0 +1,126 @@ +# -*- coding: utf-8 -*- +""" +celery.contrib.methods +====================== + +Task decorator that supports creating tasks out of methods. + +Examples +-------- + +.. code-block:: python + + from celery.contrib.methods import task + + class X(object): + + @task() + def add(self, x, y): + return x + y + +or with any task decorator: + +.. code-block:: python + + from celery.contrib.methods import task_method + + class X(object): + + @app.task(filter=task_method) + def add(self, x, y): + return x + y + +.. note:: + + The task must use the new Task base class (:class:`celery.Task`), + and the old base class using classmethods (``celery.task.Task``, + ``celery.task.base.Task``). + + This means that you have to use the task decorator from a Celery app + instance, and not the old-API: + + .. code-block:: python + + + from celery import task # BAD + from celery.task import task # ALSO BAD + + # GOOD: + app = Celery(...) + + @app.task(filter=task_method) + def foo(self): pass + + # ALSO GOOD: + from celery import current_app + + @current_app.task(filter=task_method) + def foo(self): pass + + # ALSO GOOD: + from celery import shared_task + + @shared_task(filter=task_method) + def foo(self): pass + +Caveats +------- + +- Automatic naming won't be able to know what the class name is. + + The name will still be module_name + task_name, + so two methods with the same name in the same module will collide + so that only one task can run: + + .. code-block:: python + + class A(object): + + @task() + def add(self, x, y): + return x + y + + class B(object): + + @task() + def add(self, x, y): + return x + y + + would have to be written as: + + .. code-block:: python + + class A(object): + @task(name='A.add') + def add(self, x, y): + return x + y + + class B(object): + @task(name='B.add') + def add(self, x, y): + return x + y + +""" + +from __future__ import absolute_import + +from celery import current_app + +__all__ = ['task_method', 'task'] + + +class task_method(object): + + def __init__(self, task, *args, **kwargs): + self.task = task + + def __get__(self, obj, type=None): + if obj is None: + return self.task + task = self.task.__class__() + task.__self__ = obj + return task + + +def task(*args, **kwargs): + return current_app.task(*args, **dict(kwargs, filter=task_method)) diff --git a/celery/contrib/migrate.py b/celery/contrib/migrate.py new file mode 100644 index 0000000..e4a10e9 --- /dev/null +++ b/celery/contrib/migrate.py @@ -0,0 +1,365 @@ +# -*- coding: utf-8 -*- +""" + celery.contrib.migrate + ~~~~~~~~~~~~~~~~~~~~~~ + + Migration tools. + +""" +from __future__ import absolute_import, print_function, unicode_literals + +import socket + +from functools import partial +from itertools import cycle, islice + +from kombu import eventloop, Queue +from kombu.common import maybe_declare +from kombu.utils.encoding import ensure_bytes + +from celery.app import app_or_default +from celery.five import string, string_t +from celery.utils import worker_direct + +__all__ = ['StopFiltering', 'State', 'republish', 'migrate_task', + 'migrate_tasks', 'move', 'task_id_eq', 'task_id_in', + 'start_filter', 'move_task_by_id', 'move_by_idmap', + 'move_by_taskmap', 'move_direct', 'move_direct_by_id'] + +MOVING_PROGRESS_FMT = """\ +Moving task {state.filtered}/{state.strtotal}: \ +{body[task]}[{body[id]}]\ +""" + + +class StopFiltering(Exception): + pass + + +class State(object): + count = 0 + filtered = 0 + total_apx = 0 + + @property + def strtotal(self): + if not self.total_apx: + return '?' + return string(self.total_apx) + + def __repr__(self): + if self.filtered: + return '^{0.filtered}'.format(self) + return '{0.count}/{0.strtotal}'.format(self) + + +def republish(producer, message, exchange=None, routing_key=None, + remove_props=['application_headers', + 'content_type', + 'content_encoding', + 'headers']): + body = ensure_bytes(message.body) # use raw message body. + info, headers, props = (message.delivery_info, + message.headers, message.properties) + exchange = info['exchange'] if exchange is None else exchange + routing_key = info['routing_key'] if routing_key is None else routing_key + ctype, enc = message.content_type, message.content_encoding + # remove compression header, as this will be inserted again + # when the message is recompressed. + compression = headers.pop('compression', None) + + for key in remove_props: + props.pop(key, None) + + producer.publish(ensure_bytes(body), exchange=exchange, + routing_key=routing_key, compression=compression, + headers=headers, content_type=ctype, + content_encoding=enc, **props) + + +def migrate_task(producer, body_, message, queues=None): + info = message.delivery_info + queues = {} if queues is None else queues + republish(producer, message, + exchange=queues.get(info['exchange']), + routing_key=queues.get(info['routing_key'])) + + +def filter_callback(callback, tasks): + + def filtered(body, message): + if tasks and body['task'] not in tasks: + return + + return callback(body, message) + return filtered + + +def migrate_tasks(source, dest, migrate=migrate_task, app=None, + queues=None, **kwargs): + app = app_or_default(app) + queues = prepare_queues(queues) + producer = app.amqp.TaskProducer(dest) + migrate = partial(migrate, producer, queues=queues) + + def on_declare_queue(queue): + new_queue = queue(producer.channel) + new_queue.name = queues.get(queue.name, queue.name) + if new_queue.routing_key == queue.name: + new_queue.routing_key = queues.get(queue.name, + new_queue.routing_key) + if new_queue.exchange.name == queue.name: + new_queue.exchange.name = queues.get(queue.name, queue.name) + new_queue.declare() + + return start_filter(app, source, migrate, queues=queues, + on_declare_queue=on_declare_queue, **kwargs) + + +def _maybe_queue(app, q): + if isinstance(q, string_t): + return app.amqp.queues[q] + return q + + +def move(predicate, connection=None, exchange=None, routing_key=None, + source=None, app=None, callback=None, limit=None, transform=None, + **kwargs): + """Find tasks by filtering them and move the tasks to a new queue. + + :param predicate: Filter function used to decide which messages + to move. Must accept the standard signature of ``(body, message)`` + used by Kombu consumer callbacks. If the predicate wants the message + to be moved it must return either: + + 1) a tuple of ``(exchange, routing_key)``, or + + 2) a :class:`~kombu.entity.Queue` instance, or + + 3) any other true value which means the specified + ``exchange`` and ``routing_key`` arguments will be used. + + :keyword connection: Custom connection to use. + :keyword source: Optional list of source queues to use instead of the + default (which is the queues in :setting:`CELERY_QUEUES`). + This list can also contain new :class:`~kombu.entity.Queue` instances. + :keyword exchange: Default destination exchange. + :keyword routing_key: Default destination routing key. + :keyword limit: Limit number of messages to filter. + :keyword callback: Callback called after message moved, + with signature ``(state, body, message)``. + :keyword transform: Optional function to transform the return + value (destination) of the filter function. + + Also supports the same keyword arguments as :func:`start_filter`. + + To demonstrate, the :func:`move_task_by_id` operation can be implemented + like this: + + .. code-block:: python + + def is_wanted_task(body, message): + if body['id'] == wanted_id: + return Queue('foo', exchange=Exchange('foo'), + routing_key='foo') + + move(is_wanted_task) + + or with a transform: + + .. code-block:: python + + def transform(value): + if isinstance(value, string_t): + return Queue(value, Exchange(value), value) + return value + + move(is_wanted_task, transform=transform) + + The predicate may also return a tuple of ``(exchange, routing_key)`` + to specify the destination to where the task should be moved, + or a :class:`~kombu.entitiy.Queue` instance. + Any other true value means that the task will be moved to the + default exchange/routing_key. + + """ + app = app_or_default(app) + queues = [_maybe_queue(app, queue) for queue in source or []] or None + with app.connection_or_acquire(connection, pool=False) as conn: + producer = app.amqp.TaskProducer(conn) + state = State() + + def on_task(body, message): + ret = predicate(body, message) + if ret: + if transform: + ret = transform(ret) + if isinstance(ret, Queue): + maybe_declare(ret, conn.default_channel) + ex, rk = ret.exchange.name, ret.routing_key + else: + ex, rk = expand_dest(ret, exchange, routing_key) + republish(producer, message, + exchange=ex, routing_key=rk) + message.ack() + + state.filtered += 1 + if callback: + callback(state, body, message) + if limit and state.filtered >= limit: + raise StopFiltering() + + return start_filter(app, conn, on_task, consume_from=queues, **kwargs) + + +def expand_dest(ret, exchange, routing_key): + try: + ex, rk = ret + except (TypeError, ValueError): + ex, rk = exchange, routing_key + return ex, rk + + +def task_id_eq(task_id, body, message): + return body['id'] == task_id + + +def task_id_in(ids, body, message): + return body['id'] in ids + + +def prepare_queues(queues): + if isinstance(queues, string_t): + queues = queues.split(',') + if isinstance(queues, list): + queues = dict(tuple(islice(cycle(q.split(':')), None, 2)) + for q in queues) + if queues is None: + queues = {} + return queues + + +def start_filter(app, conn, filter, limit=None, timeout=1.0, + ack_messages=False, tasks=None, queues=None, + callback=None, forever=False, on_declare_queue=None, + consume_from=None, state=None, accept=None, **kwargs): + state = state or State() + queues = prepare_queues(queues) + consume_from = [_maybe_queue(app, q) + for q in consume_from or list(queues)] + if isinstance(tasks, string_t): + tasks = set(tasks.split(',')) + if tasks is None: + tasks = set([]) + + def update_state(body, message): + state.count += 1 + if limit and state.count >= limit: + raise StopFiltering() + + def ack_message(body, message): + message.ack() + + consumer = app.amqp.TaskConsumer(conn, queues=consume_from, accept=accept) + + if tasks: + filter = filter_callback(filter, tasks) + update_state = filter_callback(update_state, tasks) + ack_message = filter_callback(ack_message, tasks) + + consumer.register_callback(filter) + consumer.register_callback(update_state) + if ack_messages: + consumer.register_callback(ack_message) + if callback is not None: + callback = partial(callback, state) + if tasks: + callback = filter_callback(callback, tasks) + consumer.register_callback(callback) + + # declare all queues on the new broker. + for queue in consumer.queues: + if queues and queue.name not in queues: + continue + if on_declare_queue is not None: + on_declare_queue(queue) + try: + _, mcount, _ = queue(consumer.channel).queue_declare(passive=True) + if mcount: + state.total_apx += mcount + except conn.channel_errors: + pass + + # start migrating messages. + with consumer: + try: + for _ in eventloop(conn, # pragma: no cover + timeout=timeout, ignore_timeouts=forever): + pass + except socket.timeout: + pass + except StopFiltering: + pass + return state + + +def move_task_by_id(task_id, dest, **kwargs): + """Find a task by id and move it to another queue. + + :param task_id: Id of task to move. + :param dest: Destination queue. + + Also supports the same keyword arguments as :func:`move`. + + """ + return move_by_idmap({task_id: dest}, **kwargs) + + +def move_by_idmap(map, **kwargs): + """Moves tasks by matching from a ``task_id: queue`` mapping, + where ``queue`` is a queue to move the task to. + + Example:: + + >>> move_by_idmap({ + ... '5bee6e82-f4ac-468e-bd3d-13e8600250bc': Queue('name'), + ... 'ada8652d-aef3-466b-abd2-becdaf1b82b3': Queue('name'), + ... '3a2b140d-7db1-41ba-ac90-c36a0ef4ab1f': Queue('name')}, + ... queues=['hipri']) + + """ + def task_id_in_map(body, message): + return map.get(body['id']) + + # adding the limit means that we don't have to consume any more + # when we've found everything. + return move(task_id_in_map, limit=len(map), **kwargs) + + +def move_by_taskmap(map, **kwargs): + """Moves tasks by matching from a ``task_name: queue`` mapping, + where ``queue`` is the queue to move the task to. + + Example:: + + >>> move_by_taskmap({ + ... 'tasks.add': Queue('name'), + ... 'tasks.mul': Queue('name'), + ... }) + + """ + + def task_name_in_map(body, message): + return map.get(body['task']) # <- name of task + + return move(task_name_in_map, **kwargs) + + +def filter_status(state, body, message, **kwargs): + print(MOVING_PROGRESS_FMT.format(state=state, body=body, **kwargs)) + + +move_direct = partial(move, transform=worker_direct) +move_direct_by_id = partial(move_task_by_id, transform=worker_direct) +move_direct_by_idmap = partial(move_by_idmap, transform=worker_direct) +move_direct_by_taskmap = partial(move_by_taskmap, transform=worker_direct) diff --git a/celery/contrib/rdb.py b/celery/contrib/rdb.py new file mode 100644 index 0000000..3e9f55b --- /dev/null +++ b/celery/contrib/rdb.py @@ -0,0 +1,180 @@ +# -*- coding: utf-8 -*- +""" +celery.contrib.rdb +================== + +Remote debugger for Celery tasks running in multiprocessing pool workers. +Inspired by http://snippets.dzone.com/posts/show/7248 + +**Usage** + +.. code-block:: python + + from celery.contrib import rdb + from celery import task + + @task() + def add(x, y): + result = x + y + rdb.set_trace() + return result + + +**Environment Variables** + +.. envvar:: CELERY_RDB_HOST + + Hostname to bind to. Default is '127.0.01', which means the socket + will only be accessible from the local host. + +.. envvar:: CELERY_RDB_PORT + + Base port to bind to. Default is 6899. + The debugger will try to find an available port starting from the + base port. The selected port will be logged by the worker. + +""" +from __future__ import absolute_import, print_function + +import errno +import os +import socket +import sys + +from pdb import Pdb + +from billiard import current_process + +from celery.five import range +from celery.platforms import ignore_errno + +__all__ = ['CELERY_RDB_HOST', 'CELERY_RDB_PORT', 'default_port', + 'Rdb', 'debugger', 'set_trace'] + +default_port = 6899 + +CELERY_RDB_HOST = os.environ.get('CELERY_RDB_HOST') or '127.0.0.1' +CELERY_RDB_PORT = int(os.environ.get('CELERY_RDB_PORT') or default_port) + +#: Holds the currently active debugger. +_current = [None] + +_frame = getattr(sys, '_getframe') + +NO_AVAILABLE_PORT = """\ +{self.ident}: Couldn't find an available port. + +Please specify one using the CELERY_RDB_PORT environment variable. +""" + +BANNER = """\ +{self.ident}: Please telnet into {self.host} {self.port}. + +Type `exit` in session to continue. + +{self.ident}: Waiting for client... +""" + +SESSION_STARTED = '{self.ident}: Now in session with {self.remote_addr}.' +SESSION_ENDED = '{self.ident}: Session with {self.remote_addr} ended.' + + +class Rdb(Pdb): + me = 'Remote Debugger' + _prev_outs = None + _sock = None + + def __init__(self, host=CELERY_RDB_HOST, port=CELERY_RDB_PORT, + port_search_limit=100, port_skew=+0, out=sys.stdout): + self.active = True + self.out = out + + self._prev_handles = sys.stdin, sys.stdout + + self._sock, this_port = self.get_avail_port( + host, port, port_search_limit, port_skew, + ) + self._sock.setblocking(1) + self._sock.listen(1) + self.ident = '{0}:{1}'.format(self.me, this_port) + self.host = host + self.port = this_port + self.say(BANNER.format(self=self)) + + self._client, address = self._sock.accept() + self._client.setblocking(1) + self.remote_addr = ':'.join(str(v) for v in address) + self.say(SESSION_STARTED.format(self=self)) + self._handle = sys.stdin = sys.stdout = self._client.makefile('rw') + Pdb.__init__(self, completekey='tab', + stdin=self._handle, stdout=self._handle) + + def get_avail_port(self, host, port, search_limit=100, skew=+0): + try: + _, skew = current_process().name.split('-') + skew = int(skew) + except ValueError: + pass + this_port = None + for i in range(search_limit): + _sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + this_port = port + skew + i + try: + _sock.bind((host, this_port)) + except socket.error as exc: + if exc.errno in [errno.EADDRINUSE, errno.EINVAL]: + continue + raise + else: + return _sock, this_port + else: + raise Exception(NO_AVAILABLE_PORT.format(self=self)) + + def say(self, m): + print(m, file=self.out) + + def _close_session(self): + self.stdin, self.stdout = sys.stdin, sys.stdout = self._prev_handles + self._handle.close() + self._client.close() + self._sock.close() + self.active = False + self.say(SESSION_ENDED.format(self=self)) + + def do_continue(self, arg): + self._close_session() + self.set_continue() + return 1 + do_c = do_cont = do_continue + + def do_quit(self, arg): + self._close_session() + self.set_quit() + return 1 + do_q = do_exit = do_quit + + def set_trace(self, frame=None): + if frame is None: + frame = _frame().f_back + with ignore_errno(errno.ECONNRESET): + Pdb.set_trace(self, frame) + + def set_quit(self): + # this raises a BdbQuit exception that we are unable to catch. + sys.settrace(None) + + +def debugger(): + """Return the current debugger instance (if any), + or creates a new one.""" + rdb = _current[0] + if rdb is None or not rdb.active: + rdb = _current[0] = Rdb() + return rdb + + +def set_trace(frame=None): + """Set breakpoint at current location, or a specified frame""" + if frame is None: + frame = _frame().f_back + return debugger().set_trace(frame) diff --git a/celery/contrib/sphinx.py b/celery/contrib/sphinx.py new file mode 100644 index 0000000..d22d82f --- /dev/null +++ b/celery/contrib/sphinx.py @@ -0,0 +1,73 @@ +# -*- coding: utf-8 -*- +""" +celery.contrib.sphinx +===================== + +Sphinx documentation plugin + +**Usage** + +Add the extension to your :file:`docs/conf.py` configuration module: + +.. code-block:: python + + extensions = (..., + 'celery.contrib.sphinx') + +If you would like to change the prefix for tasks in reference documentation +then you can change the ``celery_task_prefix`` configuration value: + +.. code-block:: python + + celery_task_prefix = '(task)' # < default + + +With the extension installed `autodoc` will automatically find +task decorated objects and generate the correct (as well as +add a ``(task)`` prefix), and you can also refer to the tasks +using `:task:proj.tasks.add` syntax. + +Use ``.. autotask::`` to manually document a task. + +""" +from __future__ import absolute_import + +from inspect import formatargspec, getargspec + +from sphinx.domains.python import PyModulelevel +from sphinx.ext.autodoc import FunctionDocumenter + +from celery.app.task import BaseTask + + +class TaskDocumenter(FunctionDocumenter): + objtype = 'task' + member_order = 11 + + @classmethod + def can_document_member(cls, member, membername, isattr, parent): + return isinstance(member, BaseTask) and getattr(member, '__wrapped__') + + def format_args(self): + wrapped = getattr(self.object, '__wrapped__') + if wrapped is not None: + argspec = getargspec(wrapped) + fmt = formatargspec(*argspec) + fmt = fmt.replace('\\', '\\\\') + return fmt + return '' + + def document_members(self, all_members=False): + pass + + +class TaskDirective(PyModulelevel): + + def get_signature_prefix(self, sig): + return self.env.config.celery_task_prefix + + +def setup(app): + app.add_autodocumenter(TaskDocumenter) + app.domains['py'].directives['task'] = TaskDirective + app.add_config_value('celery_task_prefix', '(task)', True) diff --git a/celery/datastructures.py b/celery/datastructures.py new file mode 100644 index 0000000..9c36a39 --- /dev/null +++ b/celery/datastructures.py @@ -0,0 +1,667 @@ +# -*- coding: utf-8 -*- +""" + celery.datastructures + ~~~~~~~~~~~~~~~~~~~~~ + + Custom types and data structures. + +""" +from __future__ import absolute_import, print_function, unicode_literals + +import sys +import time + +from collections import defaultdict, Mapping, MutableMapping, MutableSet +from heapq import heappush, heappop +from functools import partial +from itertools import chain + +from billiard.einfo import ExceptionInfo # noqa +from kombu.utils.encoding import safe_str +from kombu.utils.limits import TokenBucket # noqa + +from celery.five import items +from celery.utils.functional import LRUCache, first, uniq # noqa + +try: + from django.utils.functional import LazyObject, LazySettings +except ImportError: + class LazyObject(object): # noqa + pass + LazySettings = LazyObject # noqa + +DOT_HEAD = """ +{IN}{type} {id} {{ +{INp}graph [{attrs}] +""" +DOT_ATTR = '{name}={value}' +DOT_NODE = '{INp}"{0}" [{attrs}]' +DOT_EDGE = '{INp}"{0}" {dir} "{1}" [{attrs}]' +DOT_ATTRSEP = ', ' +DOT_DIRS = {'graph': '--', 'digraph': '->'} +DOT_TAIL = '{IN}}}' + +__all__ = ['GraphFormatter', 'CycleError', 'DependencyGraph', + 'AttributeDictMixin', 'AttributeDict', 'DictAttribute', + 'ConfigurationView', 'LimitedSet'] + + +def force_mapping(m): + if isinstance(m, (LazyObject, LazySettings)): + m = m._wrapped + return DictAttribute(m) if not isinstance(m, Mapping) else m + + +class GraphFormatter(object): + _attr = DOT_ATTR.strip() + _node = DOT_NODE.strip() + _edge = DOT_EDGE.strip() + _head = DOT_HEAD.strip() + _tail = DOT_TAIL.strip() + _attrsep = DOT_ATTRSEP + _dirs = dict(DOT_DIRS) + + scheme = { + 'shape': 'box', + 'arrowhead': 'vee', + 'style': 'filled', + 'fontname': 'HelveticaNeue', + } + edge_scheme = { + 'color': 'darkseagreen4', + 'arrowcolor': 'black', + 'arrowsize': 0.7, + } + node_scheme = {'fillcolor': 'palegreen3', 'color': 'palegreen4'} + term_scheme = {'fillcolor': 'palegreen1', 'color': 'palegreen2'} + graph_scheme = {'bgcolor': 'mintcream'} + + def __init__(self, root=None, type=None, id=None, + indent=0, inw=' ' * 4, **scheme): + self.id = id or 'dependencies' + self.root = root + self.type = type or 'digraph' + self.direction = self._dirs[self.type] + self.IN = inw * (indent or 0) + self.INp = self.IN + inw + self.scheme = dict(self.scheme, **scheme) + self.graph_scheme = dict(self.graph_scheme, root=self.label(self.root)) + + def attr(self, name, value): + value = '"{0}"'.format(value) + return self.FMT(self._attr, name=name, value=value) + + def attrs(self, d, scheme=None): + d = dict(self.scheme, **dict(scheme, **d or {}) if scheme else d) + return self._attrsep.join( + safe_str(self.attr(k, v)) for k, v in items(d) + ) + + def head(self, **attrs): + return self.FMT( + self._head, id=self.id, type=self.type, + attrs=self.attrs(attrs, self.graph_scheme), + ) + + def tail(self): + return self.FMT(self._tail) + + def label(self, obj): + return obj + + def node(self, obj, **attrs): + return self.draw_node(obj, self.node_scheme, attrs) + + def terminal_node(self, obj, **attrs): + return self.draw_node(obj, self.term_scheme, attrs) + + def edge(self, a, b, **attrs): + return self.draw_edge(a, b, **attrs) + + def _enc(self, s): + return s.encode('utf-8', 'ignore') + + def FMT(self, fmt, *args, **kwargs): + return self._enc(fmt.format( + *args, **dict(kwargs, IN=self.IN, INp=self.INp) + )) + + def draw_edge(self, a, b, scheme=None, attrs=None): + return self.FMT( + self._edge, self.label(a), self.label(b), + dir=self.direction, attrs=self.attrs(attrs, self.edge_scheme), + ) + + def draw_node(self, obj, scheme=None, attrs=None): + return self.FMT( + self._node, self.label(obj), attrs=self.attrs(attrs, scheme), + ) + + +class CycleError(Exception): + """A cycle was detected in an acyclic graph.""" + + +class DependencyGraph(object): + """A directed acyclic graph of objects and their dependencies. + + Supports a robust topological sort + to detect the order in which they must be handled. + + Takes an optional iterator of ``(obj, dependencies)`` + tuples to build the graph from. + + .. warning:: + + Does not support cycle detection. + + """ + + def __init__(self, it=None, formatter=None): + self.formatter = formatter or GraphFormatter() + self.adjacent = {} + if it is not None: + self.update(it) + + def add_arc(self, obj): + """Add an object to the graph.""" + self.adjacent.setdefault(obj, []) + + def add_edge(self, A, B): + """Add an edge from object ``A`` to object ``B`` + (``A`` depends on ``B``).""" + self[A].append(B) + + def connect(self, graph): + """Add nodes from another graph.""" + self.adjacent.update(graph.adjacent) + + def topsort(self): + """Sort the graph topologically. + + :returns: a list of objects in the order + in which they must be handled. + + """ + graph = DependencyGraph() + components = self._tarjan72() + + NC = dict((node, component) + for component in components + for node in component) + for component in components: + graph.add_arc(component) + for node in self: + node_c = NC[node] + for successor in self[node]: + successor_c = NC[successor] + if node_c != successor_c: + graph.add_edge(node_c, successor_c) + return [t[0] for t in graph._khan62()] + + def valency_of(self, obj): + """Return the valency (degree) of a vertex in the graph.""" + try: + l = [len(self[obj])] + except KeyError: + return 0 + for node in self[obj]: + l.append(self.valency_of(node)) + return sum(l) + + def update(self, it): + """Update the graph with data from a list + of ``(obj, dependencies)`` tuples.""" + tups = list(it) + for obj, _ in tups: + self.add_arc(obj) + for obj, deps in tups: + for dep in deps: + self.add_edge(obj, dep) + + def edges(self): + """Return generator that yields for all edges in the graph.""" + return (obj for obj, adj in items(self) if adj) + + def _khan62(self): + """Khans simple topological sort algorithm from '62 + + See http://en.wikipedia.org/wiki/Topological_sorting + + """ + count = defaultdict(lambda: 0) + result = [] + + for node in self: + for successor in self[node]: + count[successor] += 1 + ready = [node for node in self if not count[node]] + + while ready: + node = ready.pop() + result.append(node) + + for successor in self[node]: + count[successor] -= 1 + if count[successor] == 0: + ready.append(successor) + result.reverse() + return result + + def _tarjan72(self): + """Tarjan's algorithm to find strongly connected components. + + See http://bit.ly/vIMv3h. + + """ + result, stack, low = [], [], {} + + def visit(node): + if node in low: + return + num = len(low) + low[node] = num + stack_pos = len(stack) + stack.append(node) + + for successor in self[node]: + visit(successor) + low[node] = min(low[node], low[successor]) + + if num == low[node]: + component = tuple(stack[stack_pos:]) + stack[stack_pos:] = [] + result.append(component) + for item in component: + low[item] = len(self) + + for node in self: + visit(node) + + return result + + def to_dot(self, fh, formatter=None): + """Convert the graph to DOT format. + + :param fh: A file, or a file-like object to write the graph to. + + """ + seen = set() + draw = formatter or self.formatter + P = partial(print, file=fh) + + def if_not_seen(fun, obj): + if draw.label(obj) not in seen: + P(fun(obj)) + seen.add(draw.label(obj)) + + P(draw.head()) + for obj, adjacent in items(self): + if not adjacent: + if_not_seen(draw.terminal_node, obj) + for req in adjacent: + if_not_seen(draw.node, obj) + P(draw.edge(obj, req)) + P(draw.tail()) + + def format(self, obj): + return self.formatter(obj) if self.formatter else obj + + def __iter__(self): + return iter(self.adjacent) + + def __getitem__(self, node): + return self.adjacent[node] + + def __len__(self): + return len(self.adjacent) + + def __contains__(self, obj): + return obj in self.adjacent + + def _iterate_items(self): + return items(self.adjacent) + items = iteritems = _iterate_items + + def __repr__(self): + return '\n'.join(self.repr_node(N) for N in self) + + def repr_node(self, obj, level=1, fmt='{0}({1})'): + output = [fmt.format(obj, self.valency_of(obj))] + if obj in self: + for other in self[obj]: + d = fmt.format(other, self.valency_of(other)) + output.append(' ' * level + d) + output.extend(self.repr_node(other, level + 1).split('\n')[1:]) + return '\n'.join(output) + + +class AttributeDictMixin(object): + """Augment classes with a Mapping interface by adding attribute access. + + I.e. `d.key -> d[key]`. + + """ + + def __getattr__(self, k): + """`d.key -> d[key]`""" + try: + return self[k] + except KeyError: + raise AttributeError( + '{0!r} object has no attribute {1!r}'.format( + type(self).__name__, k)) + + def __setattr__(self, key, value): + """`d[key] = value -> d.key = value`""" + self[key] = value + + +class AttributeDict(dict, AttributeDictMixin): + """Dict subclass with attribute access.""" + pass + + +class DictAttribute(object): + """Dict interface to attributes. + + `obj[k] -> obj.k` + `obj[k] = val -> obj.k = val` + + """ + obj = None + + def __init__(self, obj): + object.__setattr__(self, 'obj', obj) + + def __getattr__(self, key): + return getattr(self.obj, key) + + def __setattr__(self, key, value): + return setattr(self.obj, key, value) + + def get(self, key, default=None): + try: + return self[key] + except KeyError: + return default + + def setdefault(self, key, default): + try: + return self[key] + except KeyError: + self[key] = default + return default + + def __getitem__(self, key): + try: + return getattr(self.obj, key) + except AttributeError: + raise KeyError(key) + + def __setitem__(self, key, value): + setattr(self.obj, key, value) + + def __contains__(self, key): + return hasattr(self.obj, key) + + def _iterate_keys(self): + return iter(dir(self.obj)) + iterkeys = _iterate_keys + + def __iter__(self): + return self._iterate_keys() + + def _iterate_items(self): + for key in self._iterate_keys(): + yield key, getattr(self.obj, key) + iteritems = _iterate_items + + def _iterate_values(self): + for key in self._iterate_keys(): + yield getattr(self.obj, key) + itervalues = _iterate_values + + if sys.version_info[0] == 3: # pragma: no cover + items = _iterate_items + keys = _iterate_keys + values = _iterate_values + else: + + def keys(self): + return list(self) + + def items(self): + return list(self._iterate_items()) + + def values(self): + return list(self._iterate_values()) +MutableMapping.register(DictAttribute) + + +class ConfigurationView(AttributeDictMixin): + """A view over an applications configuration dicts. + + Custom (but older) version of :class:`collections.ChainMap`. + + If the key does not exist in ``changes``, the ``defaults`` dicts + are consulted. + + :param changes: Dict containing changes to the configuration. + :param defaults: List of dicts containing the default configuration. + + """ + changes = None + defaults = None + _order = None + + def __init__(self, changes, defaults): + self.__dict__.update(changes=changes, defaults=defaults, + _order=[changes] + defaults) + + def add_defaults(self, d): + d = force_mapping(d) + self.defaults.insert(0, d) + self._order.insert(1, d) + + def __getitem__(self, key): + for d in self._order: + try: + return d[key] + except KeyError: + pass + raise KeyError(key) + + def __setitem__(self, key, value): + self.changes[key] = value + + def first(self, *keys): + return first(None, (self.get(key) for key in keys)) + + def get(self, key, default=None): + try: + return self[key] + except KeyError: + return default + + def clear(self): + """Remove all changes, but keep defaults.""" + self.changes.clear() + + def setdefault(self, key, default): + try: + return self[key] + except KeyError: + self[key] = default + return default + + def update(self, *args, **kwargs): + return self.changes.update(*args, **kwargs) + + def __contains__(self, key): + return any(key in m for m in self._order) + + def __bool__(self): + return any(self._order) + __nonzero__ = __bool__ # Py2 + + def __repr__(self): + return repr(dict(items(self))) + + def __iter__(self): + return self._iterate_keys() + + def __len__(self): + # The logic for iterating keys includes uniq(), + # so to be safe we count by explicitly iterating + return len(set().union(*self._order)) + + def _iter(self, op): + # defaults must be first in the stream, so values in + # changes takes precedence. + return chain(*[op(d) for d in reversed(self._order)]) + + def _iterate_keys(self): + return uniq(self._iter(lambda d: d)) + iterkeys = _iterate_keys + + def _iterate_items(self): + return ((key, self[key]) for key in self) + iteritems = _iterate_items + + def _iterate_values(self): + return (self[key] for key in self) + itervalues = _iterate_values + + if sys.version_info[0] == 3: # pragma: no cover + keys = _iterate_keys + items = _iterate_items + values = _iterate_values + + else: # noqa + def keys(self): + return list(self._iterate_keys()) + + def items(self): + return list(self._iterate_items()) + + def values(self): + return list(self._iterate_values()) + +MutableMapping.register(ConfigurationView) + + +class LimitedSet(object): + """Kind-of Set with limitations. + + Good for when you need to test for membership (`a in set`), + but the list might become too big. + + :keyword maxlen: Maximum number of members before we start + evicting expired members. + :keyword expires: Time in seconds, before a membership expires. + + """ + + def __init__(self, maxlen=None, expires=None, data=None, heap=None): + self.maxlen = maxlen + self.expires = expires + self._data = {} if data is None else data + self._heap = [] if heap is None else heap + # make shortcuts + self.__len__ = self._heap.__len__ + self.__iter__ = self._heap.__iter__ + self.__contains__ = self._data.__contains__ + + def add(self, value, now=time.time): + """Add a new member.""" + # offset is there to modify the length of the list, + # this way we can expire an item before inserting the value, + # and it will end up in correct order. + self.purge(1, offset=1) + inserted = now() + self._data[value] = inserted + heappush(self._heap, (inserted, value)) + + def clear(self): + """Remove all members""" + self._data.clear() + self._heap[:] = [] + + def discard(self, value): + """Remove membership by finding value.""" + try: + itime = self._data[value] + except KeyError: + return + try: + self._heap.remove((value, itime)) + except ValueError: + pass + self._data.pop(value, None) + pop_value = discard # XXX compat + + def purge(self, limit=None, offset=0, now=time.time): + """Purge expired items.""" + H, maxlen = self._heap, self.maxlen + if not maxlen: + return + + # If the data/heap gets corrupted and limit is None + # this will go into an infinite loop, so limit must + # have a value to guard the loop. + limit = len(self) + offset if limit is None else limit + + i = 0 + while len(self) + offset > maxlen: + if i >= limit: + break + try: + item = heappop(H) + except IndexError: + break + if self.expires: + if now() < item[0] + self.expires: + heappush(H, item) + break + try: + self._data.pop(item[1]) + except KeyError: # out of sync with heap + pass + i += 1 + + def update(self, other, heappush=heappush): + if isinstance(other, LimitedSet): + self._data.update(other._data) + self._heap.extend(other._heap) + self._heap.sort() + else: + for obj in other: + self.add(obj) + + def as_dict(self): + return self._data + + def __eq__(self, other): + return self._heap == other._heap + + def __ne__(self, other): + return not self.__eq__(other) + + def __repr__(self): + return 'LimitedSet({0})'.format(len(self)) + + def __iter__(self): + return (item[1] for item in self._heap) + + def __len__(self): + return len(self._heap) + + def __contains__(self, key): + return key in self._data + + def __reduce__(self): + return self.__class__, ( + self.maxlen, self.expires, self._data, self._heap, + ) +MutableSet.register(LimitedSet) diff --git a/celery/events/__init__.py b/celery/events/__init__.py new file mode 100644 index 0000000..931f395 --- /dev/null +++ b/celery/events/__init__.py @@ -0,0 +1,407 @@ +# -*- coding: utf-8 -*- +""" + celery.events + ~~~~~~~~~~~~~ + + Events is a stream of messages sent for certain actions occurring + in the worker (and clients if :setting:`CELERY_SEND_TASK_SENT_EVENT` + is enabled), used for monitoring purposes. + +""" +from __future__ import absolute_import + +import os +import time +import threading +import warnings + +from collections import deque +from contextlib import contextmanager +from copy import copy +from operator import itemgetter + +from kombu import Exchange, Queue, Producer +from kombu.connection import maybe_channel +from kombu.mixins import ConsumerMixin +from kombu.utils import cached_property + +from celery.app import app_or_default +from celery.utils import anon_nodename, uuid +from celery.utils.functional import dictfilter +from celery.utils.timeutils import adjust_timestamp, utcoffset, maybe_s_to_ms + +__all__ = ['Events', 'Event', 'EventDispatcher', 'EventReceiver'] + +event_exchange = Exchange('celeryev', type='topic') + +_TZGETTER = itemgetter('utcoffset', 'timestamp') + +W_YAJL = """ +anyjson is currently using the yajl library. +This json implementation is broken, it severely truncates floats +so timestamps will not work. + +Please uninstall yajl or force anyjson to use a different library. +""" + +CLIENT_CLOCK_SKEW = -1 + + +def get_exchange(conn): + ex = copy(event_exchange) + if conn.transport.driver_type == 'redis': + # quick hack for Issue #436 + ex.type = 'fanout' + return ex + + +def Event(type, _fields=None, __dict__=dict, __now__=time.time, **fields): + """Create an event. + + An event is a dictionary, the only required field is ``type``. + A ``timestamp`` field will be set to the current time if not provided. + + """ + event = __dict__(_fields, **fields) if _fields else fields + if 'timestamp' not in event: + event.update(timestamp=__now__(), type=type) + else: + event['type'] = type + return event + + +def group_from(type): + """Get the group part of an event type name. + + E.g.:: + + >>> group_from('task-sent') + 'task' + + >>> group_from('custom-my-event') + 'custom' + + """ + return type.split('-', 1)[0] + + +class EventDispatcher(object): + """Dispatches event messages. + + :param connection: Connection to the broker. + + :keyword hostname: Hostname to identify ourselves as, + by default uses the hostname returned by + :func:`~celery.utils.anon_nodename`. + + :keyword groups: List of groups to send events for. :meth:`send` will + ignore send requests to groups not in this list. + If this is :const:`None`, all events will be sent. Example groups + include ``"task"`` and ``"worker"``. + + :keyword enabled: Set to :const:`False` to not actually publish any events, + making :meth:`send` a noop operation. + + :keyword channel: Can be used instead of `connection` to specify + an exact channel to use when sending events. + + :keyword buffer_while_offline: If enabled events will be buffered + while the connection is down. :meth:`flush` must be called + as soon as the connection is re-established. + + You need to :meth:`close` this after use. + + """ + DISABLED_TRANSPORTS = set(['sql']) + + app = None + + # set of callbacks to be called when :meth:`enabled`. + on_enabled = None + + # set of callbacks to be called when :meth:`disabled`. + on_disabled = None + + def __init__(self, connection=None, hostname=None, enabled=True, + channel=None, buffer_while_offline=True, app=None, + serializer=None, groups=None): + self.app = app_or_default(app or self.app) + self.connection = connection + self.channel = channel + self.hostname = hostname or anon_nodename() + self.buffer_while_offline = buffer_while_offline + self.mutex = threading.Lock() + self.producer = None + self._outbound_buffer = deque() + self.serializer = serializer or self.app.conf.CELERY_EVENT_SERIALIZER + self.on_enabled = set() + self.on_disabled = set() + self.groups = set(groups or []) + self.tzoffset = [-time.timezone, -time.altzone] + self.clock = self.app.clock + if not connection and channel: + self.connection = channel.connection.client + self.enabled = enabled + conninfo = self.connection or self.app.connection() + self.exchange = get_exchange(conninfo) + if conninfo.transport.driver_type in self.DISABLED_TRANSPORTS: + self.enabled = False + if self.enabled: + self.enable() + self.headers = {'hostname': self.hostname} + self.pid = os.getpid() + self.warn_if_yajl() + + def warn_if_yajl(self): + import anyjson + if anyjson.implementation.name == 'yajl': + warnings.warn(UserWarning(W_YAJL)) + + def __enter__(self): + return self + + def __exit__(self, *exc_info): + self.close() + + def enable(self): + self.producer = Producer(self.channel or self.connection, + exchange=self.exchange, + serializer=self.serializer) + self.enabled = True + for callback in self.on_enabled: + callback() + + def disable(self): + if self.enabled: + self.enabled = False + self.close() + for callback in self.on_disabled: + callback() + + def publish(self, type, fields, producer, retry=False, + retry_policy=None, blind=False, utcoffset=utcoffset, + Event=Event): + """Publish event using a custom :class:`~kombu.Producer` + instance. + + :param type: Event type name, with group separated by dash (`-`). + :param fields: Dictionary of event fields, must be json serializable. + :param producer: :class:`~kombu.Producer` instance to use, + only the ``publish`` method will be called. + :keyword retry: Retry in the event of connection failure. + :keyword retry_policy: Dict of custom retry policy, see + :meth:`~kombu.Connection.ensure`. + :keyword blind: Don't set logical clock value (also do not forward + the internal logical clock). + :keyword Event: Event type used to create event, + defaults to :func:`Event`. + :keyword utcoffset: Function returning the current utcoffset in hours. + + """ + + with self.mutex: + clock = None if blind else self.clock.forward() + event = Event(type, hostname=self.hostname, utcoffset=utcoffset(), + pid=self.pid, clock=clock, **fields) + exchange = self.exchange + producer.publish( + event, + routing_key=type.replace('-', '.'), + exchange=exchange.name, + retry=retry, + retry_policy=retry_policy, + declare=[exchange], + serializer=self.serializer, + headers=self.headers, + ) + + def send(self, type, blind=False, **fields): + """Send event. + + :param type: Event type name, with group separated by dash (`-`). + :keyword retry: Retry in the event of connection failure. + :keyword retry_policy: Dict of custom retry policy, see + :meth:`~kombu.Connection.ensure`. + :keyword blind: Don't set logical clock value (also do not forward + the internal logical clock). + :keyword Event: Event type used to create event, + defaults to :func:`Event`. + :keyword utcoffset: Function returning the current utcoffset in hours. + :keyword \*\*fields: Event fields, must be json serializable. + + """ + if self.enabled: + groups = self.groups + if groups and group_from(type) not in groups: + return + try: + self.publish(type, fields, self.producer, blind) + except Exception as exc: + if not self.buffer_while_offline: + raise + self._outbound_buffer.append((type, fields, exc)) + + def flush(self): + """Flushes the outbound buffer.""" + while self._outbound_buffer: + try: + type, fields, _ = self._outbound_buffer.popleft() + except IndexError: + return + self.send(type, **fields) + + def extend_buffer(self, other): + """Copies the outbound buffer of another instance.""" + self._outbound_buffer.extend(other._outbound_buffer) + + def close(self): + """Close the event dispatcher.""" + self.mutex.locked() and self.mutex.release() + self.producer = None + + def _get_publisher(self): + return self.producer + + def _set_publisher(self, producer): + self.producer = producer + publisher = property(_get_publisher, _set_publisher) # XXX compat + + +class EventReceiver(ConsumerMixin): + """Capture events. + + :param connection: Connection to the broker. + :keyword handlers: Event handlers. + + :attr:`handlers` is a dict of event types and their handlers, + the special handler `"*"` captures all events that doesn't have a + handler. + + """ + app = None + + def __init__(self, channel, handlers=None, routing_key='#', + node_id=None, app=None, queue_prefix='celeryev', + accept=None): + self.app = app_or_default(app or self.app) + self.channel = maybe_channel(channel) + self.handlers = {} if handlers is None else handlers + self.routing_key = routing_key + self.node_id = node_id or uuid() + self.queue_prefix = queue_prefix + self.exchange = get_exchange(self.connection or self.app.connection()) + self.queue = Queue('.'.join([self.queue_prefix, self.node_id]), + exchange=self.exchange, + routing_key=self.routing_key, + auto_delete=True, + durable=False, + queue_arguments=self._get_queue_arguments()) + self.clock = self.app.clock + self.adjust_clock = self.clock.adjust + self.forward_clock = self.clock.forward + if accept is None: + accept = set([self.app.conf.CELERY_EVENT_SERIALIZER, 'json']) + self.accept = accept + + def _get_queue_arguments(self): + conf = self.app.conf + return dictfilter({ + 'x-message-ttl': maybe_s_to_ms(conf.CELERY_EVENT_QUEUE_TTL), + 'x-expires': maybe_s_to_ms(conf.CELERY_EVENT_QUEUE_EXPIRES), + }) + + def process(self, type, event): + """Process the received event by dispatching it to the appropriate + handler.""" + handler = self.handlers.get(type) or self.handlers.get('*') + handler and handler(event) + + def get_consumers(self, Consumer, channel): + return [Consumer(queues=[self.queue], + callbacks=[self._receive], no_ack=True, + accept=self.accept)] + + def on_consume_ready(self, connection, channel, consumers, + wakeup=True, **kwargs): + if wakeup: + self.wakeup_workers(channel=channel) + + def itercapture(self, limit=None, timeout=None, wakeup=True): + return self.consume(limit=limit, timeout=timeout, wakeup=wakeup) + + def capture(self, limit=None, timeout=None, wakeup=True): + """Open up a consumer capturing events. + + This has to run in the main process, and it will never + stop unless forced via :exc:`KeyboardInterrupt` or :exc:`SystemExit`. + + """ + return list(self.consume(limit=limit, timeout=timeout, wakeup=wakeup)) + + def wakeup_workers(self, channel=None): + self.app.control.broadcast('heartbeat', + connection=self.connection, + channel=channel) + + def event_from_message(self, body, localize=True, + now=time.time, tzfields=_TZGETTER, + adjust_timestamp=adjust_timestamp, + CLIENT_CLOCK_SKEW=CLIENT_CLOCK_SKEW): + type = body['type'] + if type == 'task-sent': + # clients never sync so cannot use their clock value + _c = body['clock'] = (self.clock.value or 1) + CLIENT_CLOCK_SKEW + self.adjust_clock(_c) + else: + try: + clock = body['clock'] + except KeyError: + body['clock'] = self.forward_clock() + else: + self.adjust_clock(clock) + + if localize: + try: + offset, timestamp = tzfields(body) + except KeyError: + pass + else: + body['timestamp'] = adjust_timestamp(timestamp, offset) + body['local_received'] = now() + return type, body + + def _receive(self, body, message): + self.process(*self.event_from_message(body)) + + @property + def connection(self): + return self.channel.connection.client if self.channel else None + + +class Events(object): + + def __init__(self, app=None): + self.app = app + + @cached_property + def Receiver(self): + return self.app.subclass_with_self(EventReceiver, + reverse='events.Receiver') + + @cached_property + def Dispatcher(self): + return self.app.subclass_with_self(EventDispatcher, + reverse='events.Dispatcher') + + @cached_property + def State(self): + return self.app.subclass_with_self('celery.events.state:State', + reverse='events.State') + + @contextmanager + def default_dispatcher(self, hostname=None, enabled=True, + buffer_while_offline=False): + with self.app.amqp.producer_pool.acquire(block=True) as prod: + with self.Dispatcher(prod.connection, hostname, enabled, + prod.channel, buffer_while_offline) as d: + yield d diff --git a/celery/events/cursesmon.py b/celery/events/cursesmon.py new file mode 100644 index 0000000..796565f --- /dev/null +++ b/celery/events/cursesmon.py @@ -0,0 +1,544 @@ +# -*- coding: utf-8 -*- +""" + celery.events.cursesmon + ~~~~~~~~~~~~~~~~~~~~~~~ + + Graphical monitor of Celery events using curses. + +""" +from __future__ import absolute_import, print_function + +import curses +import sys +import threading + +from datetime import datetime +from itertools import count +from textwrap import wrap +from time import time +from math import ceil + +from celery import VERSION_BANNER +from celery import states +from celery.app import app_or_default +from celery.five import items, values +from celery.utils.text import abbr, abbrtask + +__all__ = ['CursesMonitor', 'evtop'] + +BORDER_SPACING = 4 +LEFT_BORDER_OFFSET = 3 +UUID_WIDTH = 36 +STATE_WIDTH = 8 +TIMESTAMP_WIDTH = 8 +MIN_WORKER_WIDTH = 15 +MIN_TASK_WIDTH = 16 + +# this module is considered experimental +# we don't care about coverage. + +STATUS_SCREEN = """\ +events: {s.event_count} tasks:{s.task_count} workers:{w_alive}/{w_all} +""" + + +class CursesMonitor(object): # pragma: no cover + keymap = {} + win = None + screen_width = None + screen_delay = 10 + selected_task = None + selected_position = 0 + selected_str = 'Selected: ' + foreground = curses.COLOR_BLACK + background = curses.COLOR_WHITE + online_str = 'Workers online: ' + help_title = 'Keys: ' + help = ('j:down k:up i:info t:traceback r:result c:revoke ^c: quit') + greet = 'celery events {0}'.format(VERSION_BANNER) + info_str = 'Info: ' + + def __init__(self, state, app, keymap=None): + self.app = app + self.keymap = keymap or self.keymap + self.state = state + default_keymap = {'J': self.move_selection_down, + 'K': self.move_selection_up, + 'C': self.revoke_selection, + 'T': self.selection_traceback, + 'R': self.selection_result, + 'I': self.selection_info, + 'L': self.selection_rate_limit} + self.keymap = dict(default_keymap, **self.keymap) + self.lock = threading.RLock() + + def format_row(self, uuid, task, worker, timestamp, state): + mx = self.display_width + + # include spacing + detail_width = mx - 1 - STATE_WIDTH - 1 - TIMESTAMP_WIDTH + uuid_space = detail_width - 1 - MIN_TASK_WIDTH - 1 - MIN_WORKER_WIDTH + + if uuid_space < UUID_WIDTH: + uuid_width = uuid_space + else: + uuid_width = UUID_WIDTH + + detail_width = detail_width - uuid_width - 1 + task_width = int(ceil(detail_width / 2.0)) + worker_width = detail_width - task_width - 1 + + uuid = abbr(uuid, uuid_width).ljust(uuid_width) + worker = abbr(worker, worker_width).ljust(worker_width) + task = abbrtask(task, task_width).ljust(task_width) + state = abbr(state, STATE_WIDTH).ljust(STATE_WIDTH) + timestamp = timestamp.ljust(TIMESTAMP_WIDTH) + + row = '{0} {1} {2} {3} {4} '.format(uuid, worker, task, + timestamp, state) + if self.screen_width is None: + self.screen_width = len(row[:mx]) + return row[:mx] + + @property + def screen_width(self): + _, mx = self.win.getmaxyx() + return mx + + @property + def screen_height(self): + my, _ = self.win.getmaxyx() + return my + + @property + def display_width(self): + _, mx = self.win.getmaxyx() + return mx - BORDER_SPACING + + @property + def display_height(self): + my, _ = self.win.getmaxyx() + return my - 10 + + @property + def limit(self): + return self.display_height + + def find_position(self): + if not self.tasks: + return 0 + for i, e in enumerate(self.tasks): + if self.selected_task == e[0]: + return i + return 0 + + def move_selection_up(self): + self.move_selection(-1) + + def move_selection_down(self): + self.move_selection(1) + + def move_selection(self, direction=1): + if not self.tasks: + return + pos = self.find_position() + try: + self.selected_task = self.tasks[pos + direction][0] + except IndexError: + self.selected_task = self.tasks[0][0] + + keyalias = {curses.KEY_DOWN: 'J', + curses.KEY_UP: 'K', + curses.KEY_ENTER: 'I'} + + def handle_keypress(self): + try: + key = self.win.getkey().upper() + except: + return + key = self.keyalias.get(key) or key + handler = self.keymap.get(key) + if handler is not None: + handler() + + def alert(self, callback, title=None): + self.win.erase() + my, mx = self.win.getmaxyx() + y = blank_line = count(2) + if title: + self.win.addstr(next(y), 3, title, + curses.A_BOLD | curses.A_UNDERLINE) + next(blank_line) + callback(my, mx, next(y)) + self.win.addstr(my - 1, 0, 'Press any key to continue...', + curses.A_BOLD) + self.win.refresh() + while 1: + try: + return self.win.getkey().upper() + except: + pass + + def selection_rate_limit(self): + if not self.selected_task: + return curses.beep() + task = self.state.tasks[self.selected_task] + if not task.name: + return curses.beep() + + my, mx = self.win.getmaxyx() + r = 'New rate limit: ' + self.win.addstr(my - 2, 3, r, curses.A_BOLD | curses.A_UNDERLINE) + self.win.addstr(my - 2, len(r) + 3, ' ' * (mx - len(r))) + rlimit = self.readline(my - 2, 3 + len(r)) + + if rlimit: + reply = self.app.control.rate_limit(task.name, + rlimit.strip(), reply=True) + self.alert_remote_control_reply(reply) + + def alert_remote_control_reply(self, reply): + + def callback(my, mx, xs): + y = count(xs) + if not reply: + self.win.addstr( + next(y), 3, 'No replies received in 1s deadline.', + curses.A_BOLD + curses.color_pair(2), + ) + return + + for subreply in reply: + curline = next(y) + + host, response = next(items(subreply)) + host = '{0}: '.format(host) + self.win.addstr(curline, 3, host, curses.A_BOLD) + attr = curses.A_NORMAL + text = '' + if 'error' in response: + text = response['error'] + attr |= curses.color_pair(2) + elif 'ok' in response: + text = response['ok'] + attr |= curses.color_pair(3) + self.win.addstr(curline, 3 + len(host), text, attr) + + return self.alert(callback, 'Remote Control Command Replies') + + def readline(self, x, y): + buffer = str() + curses.echo() + try: + i = 0 + while 1: + ch = self.win.getch(x, y + i) + if ch != -1: + if ch in (10, curses.KEY_ENTER): # enter + break + if ch in (27, ): + buffer = str() + break + buffer += chr(ch) + i += 1 + finally: + curses.noecho() + return buffer + + def revoke_selection(self): + if not self.selected_task: + return curses.beep() + reply = self.app.control.revoke(self.selected_task, reply=True) + self.alert_remote_control_reply(reply) + + def selection_info(self): + if not self.selected_task: + return + + def alert_callback(mx, my, xs): + my, mx = self.win.getmaxyx() + y = count(xs) + task = self.state.tasks[self.selected_task] + info = task.info(extra=['state']) + infoitems = [ + ('args', info.pop('args', None)), + ('kwargs', info.pop('kwargs', None)) + ] + list(info.items()) + for key, value in infoitems: + if key is None: + continue + value = str(value) + curline = next(y) + keys = key + ': ' + self.win.addstr(curline, 3, keys, curses.A_BOLD) + wrapped = wrap(value, mx - 2) + if len(wrapped) == 1: + self.win.addstr( + curline, len(keys) + 3, + abbr(wrapped[0], + self.screen_width - (len(keys) + 3))) + else: + for subline in wrapped: + nexty = next(y) + if nexty >= my - 1: + subline = ' ' * 4 + '[...]' + elif nexty >= my: + break + self.win.addstr( + nexty, 3, + abbr(' ' * 4 + subline, self.screen_width - 4), + curses.A_NORMAL, + ) + + return self.alert( + alert_callback, 'Task details for {0.selected_task}'.format(self), + ) + + def selection_traceback(self): + if not self.selected_task: + return curses.beep() + task = self.state.tasks[self.selected_task] + if task.state not in states.EXCEPTION_STATES: + return curses.beep() + + def alert_callback(my, mx, xs): + y = count(xs) + for line in task.traceback.split('\n'): + self.win.addstr(next(y), 3, line) + + return self.alert( + alert_callback, + 'Task Exception Traceback for {0.selected_task}'.format(self), + ) + + def selection_result(self): + if not self.selected_task: + return + + def alert_callback(my, mx, xs): + y = count(xs) + task = self.state.tasks[self.selected_task] + result = (getattr(task, 'result', None) + or getattr(task, 'exception', None)) + for line in wrap(result, mx - 2): + self.win.addstr(next(y), 3, line) + + return self.alert( + alert_callback, + 'Task Result for {0.selected_task}'.format(self), + ) + + def display_task_row(self, lineno, task): + state_color = self.state_colors.get(task.state) + attr = curses.A_NORMAL + if task.uuid == self.selected_task: + attr = curses.A_STANDOUT + timestamp = datetime.utcfromtimestamp( + task.timestamp or time(), + ) + timef = timestamp.strftime('%H:%M:%S') + hostname = task.worker.hostname if task.worker else '*NONE*' + line = self.format_row(task.uuid, task.name, + hostname, + timef, task.state) + self.win.addstr(lineno, LEFT_BORDER_OFFSET, line, attr) + + if state_color: + self.win.addstr(lineno, + len(line) - STATE_WIDTH + BORDER_SPACING - 1, + task.state, state_color | attr) + + def draw(self): + with self.lock: + win = self.win + self.handle_keypress() + x = LEFT_BORDER_OFFSET + y = blank_line = count(2) + my, mx = win.getmaxyx() + win.erase() + win.bkgd(' ', curses.color_pair(1)) + win.border() + win.addstr(1, x, self.greet, curses.A_DIM | curses.color_pair(5)) + next(blank_line) + win.addstr(next(y), x, self.format_row('UUID', 'TASK', + 'WORKER', 'TIME', 'STATE'), + curses.A_BOLD | curses.A_UNDERLINE) + tasks = self.tasks + if tasks: + for row, (uuid, task) in enumerate(tasks): + if row > self.display_height: + break + + if task.uuid: + lineno = next(y) + self.display_task_row(lineno, task) + + # -- Footer + next(blank_line) + win.hline(my - 6, x, curses.ACS_HLINE, self.screen_width - 4) + + # Selected Task Info + if self.selected_task: + win.addstr(my - 5, x, self.selected_str, curses.A_BOLD) + info = 'Missing extended info' + detail = '' + try: + selection = self.state.tasks[self.selected_task] + except KeyError: + pass + else: + info = selection.info() + if 'runtime' in info: + info['runtime'] = '{0:.2f}'.format(info['runtime']) + if 'result' in info: + info['result'] = abbr(info['result'], 16) + info = ' '.join( + '{0}={1}'.format(key, value) + for key, value in items(info) + ) + detail = '... -> key i' + infowin = abbr(info, + self.screen_width - len(self.selected_str) - 2, + detail) + win.addstr(my - 5, x + len(self.selected_str), infowin) + # Make ellipsis bold + if detail in infowin: + detailpos = len(infowin) - len(detail) + win.addstr(my - 5, x + len(self.selected_str) + detailpos, + detail, curses.A_BOLD) + else: + win.addstr(my - 5, x, 'No task selected', curses.A_NORMAL) + + # Workers + if self.workers: + win.addstr(my - 4, x, self.online_str, curses.A_BOLD) + win.addstr(my - 4, x + len(self.online_str), + ', '.join(sorted(self.workers)), curses.A_NORMAL) + else: + win.addstr(my - 4, x, 'No workers discovered.') + + # Info + win.addstr(my - 3, x, self.info_str, curses.A_BOLD) + win.addstr( + my - 3, x + len(self.info_str), + STATUS_SCREEN.format( + s=self.state, + w_alive=len([w for w in values(self.state.workers) + if w.alive]), + w_all=len(self.state.workers), + ), + curses.A_DIM, + ) + + # Help + self.safe_add_str(my - 2, x, self.help_title, curses.A_BOLD) + self.safe_add_str(my - 2, x + len(self.help_title), self.help, + curses.A_DIM) + win.refresh() + + def safe_add_str(self, y, x, string, *args, **kwargs): + if x + len(string) > self.screen_width: + string = string[:self.screen_width - x] + self.win.addstr(y, x, string, *args, **kwargs) + + def init_screen(self): + with self.lock: + self.win = curses.initscr() + self.win.nodelay(True) + self.win.keypad(True) + curses.start_color() + curses.init_pair(1, self.foreground, self.background) + # exception states + curses.init_pair(2, curses.COLOR_RED, self.background) + # successful state + curses.init_pair(3, curses.COLOR_GREEN, self.background) + # revoked state + curses.init_pair(4, curses.COLOR_MAGENTA, self.background) + # greeting + curses.init_pair(5, curses.COLOR_BLUE, self.background) + # started state + curses.init_pair(6, curses.COLOR_YELLOW, self.foreground) + + self.state_colors = {states.SUCCESS: curses.color_pair(3), + states.REVOKED: curses.color_pair(4), + states.STARTED: curses.color_pair(6)} + for state in states.EXCEPTION_STATES: + self.state_colors[state] = curses.color_pair(2) + + curses.cbreak() + + def resetscreen(self): + with self.lock: + curses.nocbreak() + self.win.keypad(False) + curses.echo() + curses.endwin() + + def nap(self): + curses.napms(self.screen_delay) + + @property + def tasks(self): + return list(self.state.tasks_by_time(limit=self.limit)) + + @property + def workers(self): + return [hostname for hostname, w in items(self.state.workers) + if w.alive] + + +class DisplayThread(threading.Thread): # pragma: no cover + + def __init__(self, display): + self.display = display + self.shutdown = False + threading.Thread.__init__(self) + + def run(self): + while not self.shutdown: + self.display.draw() + self.display.nap() + + +def capture_events(app, state, display): # pragma: no cover + + def on_connection_error(exc, interval): + print('Connection Error: {0!r}. Retry in {1}s.'.format( + exc, interval), file=sys.stderr) + + while 1: + print('-> evtop: starting capture...', file=sys.stderr) + with app.connection() as conn: + try: + conn.ensure_connection(on_connection_error, + app.conf.BROKER_CONNECTION_MAX_RETRIES) + recv = app.events.Receiver(conn, handlers={'*': state.event}) + display.resetscreen() + display.init_screen() + recv.capture() + except conn.connection_errors + conn.channel_errors as exc: + print('Connection lost: {0!r}'.format(exc), file=sys.stderr) + + +def evtop(app=None): # pragma: no cover + app = app_or_default(app) + state = app.events.State() + display = CursesMonitor(state, app) + display.init_screen() + refresher = DisplayThread(display) + refresher.start() + try: + capture_events(app, state, display) + except Exception: + refresher.shutdown = True + refresher.join() + display.resetscreen() + raise + except (KeyboardInterrupt, SystemExit): + refresher.shutdown = True + refresher.join() + display.resetscreen() + + +if __name__ == '__main__': # pragma: no cover + evtop() diff --git a/celery/events/dumper.py b/celery/events/dumper.py new file mode 100644 index 0000000..323afc4 --- /dev/null +++ b/celery/events/dumper.py @@ -0,0 +1,109 @@ +# -*- coding: utf-8 -*- +""" + celery.events.dumper + ~~~~~~~~~~~~~~~~~~~~ + + This is a simple program that dumps events to the console + as they happen. Think of it like a `tcpdump` for Celery events. + +""" +from __future__ import absolute_import, print_function + +import sys + +from datetime import datetime + +from celery.app import app_or_default +from celery.utils.functional import LRUCache +from celery.utils.timeutils import humanize_seconds + +__all__ = ['Dumper', 'evdump'] + +TASK_NAMES = LRUCache(limit=0xFFF) + +HUMAN_TYPES = {'worker-offline': 'shutdown', + 'worker-online': 'started', + 'worker-heartbeat': 'heartbeat'} + +CONNECTION_ERROR = """\ +-> Cannot connect to %s: %s. +Trying again %s +""" + + +def humanize_type(type): + try: + return HUMAN_TYPES[type.lower()] + except KeyError: + return type.lower().replace('-', ' ') + + +class Dumper(object): + + def __init__(self, out=sys.stdout): + self.out = out + + def say(self, msg): + print(msg, file=self.out) + # need to flush so that output can be piped. + try: + self.out.flush() + except AttributeError: + pass + + def on_event(self, ev): + timestamp = datetime.utcfromtimestamp(ev.pop('timestamp')) + type = ev.pop('type').lower() + hostname = ev.pop('hostname') + if type.startswith('task-'): + uuid = ev.pop('uuid') + if type in ('task-received', 'task-sent'): + task = TASK_NAMES[uuid] = '{0}({1}) args={2} kwargs={3}' \ + .format(ev.pop('name'), uuid, + ev.pop('args'), + ev.pop('kwargs')) + else: + task = TASK_NAMES.get(uuid, '') + return self.format_task_event(hostname, timestamp, + type, task, ev) + fields = ', '.join( + '{0}={1}'.format(key, ev[key]) for key in sorted(ev) + ) + sep = fields and ':' or '' + self.say('{0} [{1}] {2}{3} {4}'.format( + hostname, timestamp, humanize_type(type), sep, fields), + ) + + def format_task_event(self, hostname, timestamp, type, task, event): + fields = ', '.join( + '{0}={1}'.format(key, event[key]) for key in sorted(event) + ) + sep = fields and ':' or '' + self.say('{0} [{1}] {2}{3} {4} {5}'.format( + hostname, timestamp, humanize_type(type), sep, task, fields), + ) + + +def evdump(app=None, out=sys.stdout): + app = app_or_default(app) + dumper = Dumper(out=out) + dumper.say('-> evdump: starting capture...') + conn = app.connection().clone() + + def _error_handler(exc, interval): + dumper.say(CONNECTION_ERROR % ( + conn.as_uri(), exc, humanize_seconds(interval, 'in', ' ') + )) + + while 1: + try: + conn.ensure_connection(_error_handler) + recv = app.events.Receiver(conn, handlers={'*': dumper.on_event}) + recv.capture() + except (KeyboardInterrupt, SystemExit): + return conn and conn.close() + except conn.connection_errors + conn.channel_errors: + dumper.say('-> Connection lost, attempting reconnect') + +if __name__ == '__main__': # pragma: no cover + evdump() diff --git a/celery/events/snapshot.py b/celery/events/snapshot.py new file mode 100644 index 0000000..0dd4155 --- /dev/null +++ b/celery/events/snapshot.py @@ -0,0 +1,114 @@ +# -*- coding: utf-8 -*- +""" + celery.events.snapshot + ~~~~~~~~~~~~~~~~~~~~~~ + + Consuming the events as a stream is not always suitable + so this module implements a system to take snapshots of the + state of a cluster at regular intervals. There is a full + implementation of this writing the snapshots to a database + in :mod:`djcelery.snapshots` in the `django-celery` distribution. + +""" +from __future__ import absolute_import + +from kombu.utils.limits import TokenBucket + +from celery import platforms +from celery.app import app_or_default +from celery.utils.timer2 import Timer +from celery.utils.dispatch import Signal +from celery.utils.imports import instantiate +from celery.utils.log import get_logger +from celery.utils.timeutils import rate + +__all__ = ['Polaroid', 'evcam'] + +logger = get_logger('celery.evcam') + + +class Polaroid(object): + timer = None + shutter_signal = Signal(providing_args=('state', )) + cleanup_signal = Signal() + clear_after = False + + _tref = None + _ctref = None + + def __init__(self, state, freq=1.0, maxrate=None, + cleanup_freq=3600.0, timer=None, app=None): + self.app = app_or_default(app) + self.state = state + self.freq = freq + self.cleanup_freq = cleanup_freq + self.timer = timer or self.timer or Timer() + self.logger = logger + self.maxrate = maxrate and TokenBucket(rate(maxrate)) + + def install(self): + self._tref = self.timer.call_repeatedly(self.freq, self.capture) + self._ctref = self.timer.call_repeatedly( + self.cleanup_freq, self.cleanup, + ) + + def on_shutter(self, state): + pass + + def on_cleanup(self): + pass + + def cleanup(self): + logger.debug('Cleanup: Running...') + self.cleanup_signal.send(None) + self.on_cleanup() + + def shutter(self): + if self.maxrate is None or self.maxrate.can_consume(): + logger.debug('Shutter: %s', self.state) + self.shutter_signal.send(self.state) + self.on_shutter(self.state) + + def capture(self): + self.state.freeze_while(self.shutter, clear_after=self.clear_after) + + def cancel(self): + if self._tref: + self._tref() # flush all received events. + self._tref.cancel() + if self._ctref: + self._ctref.cancel() + + def __enter__(self): + self.install() + return self + + def __exit__(self, *exc_info): + self.cancel() + + +def evcam(camera, freq=1.0, maxrate=None, loglevel=0, + logfile=None, pidfile=None, timer=None, app=None): + app = app_or_default(app) + + if pidfile: + platforms.create_pidlock(pidfile) + + app.log.setup_logging_subsystem(loglevel, logfile) + + print('-> evcam: Taking snapshots with {0} (every {1} secs.)'.format( + camera, freq)) + state = app.events.State() + cam = instantiate(camera, state, app=app, freq=freq, + maxrate=maxrate, timer=timer) + cam.install() + conn = app.connection() + recv = app.events.Receiver(conn, handlers={'*': state.event}) + try: + try: + recv.capture(limit=None) + except KeyboardInterrupt: + raise SystemExit + finally: + cam.cancel() + conn.close() diff --git a/celery/events/state.py b/celery/events/state.py new file mode 100644 index 0000000..c78f2d0 --- /dev/null +++ b/celery/events/state.py @@ -0,0 +1,656 @@ +# -*- coding: utf-8 -*- +""" + celery.events.state + ~~~~~~~~~~~~~~~~~~~ + + This module implements a datastructure used to keep + track of the state of a cluster of workers and the tasks + it is working on (by consuming events). + + For every event consumed the state is updated, + so the state represents the state of the cluster + at the time of the last event. + + Snapshots (:mod:`celery.events.snapshot`) can be used to + take "pictures" of this state at regular intervals + to e.g. store that in a database. + +""" +from __future__ import absolute_import + +import bisect +import sys +import threading + +from datetime import datetime +from decimal import Decimal +from itertools import islice +from operator import itemgetter +from time import time +from weakref import ref + +from kombu.clocks import timetuple +from kombu.utils import cached_property, kwdict + +from celery import states +from celery.five import class_property, items, values +from celery.utils import deprecated +from celery.utils.functional import LRUCache, memoize +from celery.utils.log import get_logger + +PYPY = hasattr(sys, 'pypy_version_info') + +# The window (in percentage) is added to the workers heartbeat +# frequency. If the time between updates exceeds this window, +# then the worker is considered to be offline. +HEARTBEAT_EXPIRE_WINDOW = 200 + +# Max drift between event timestamp and time of event received +# before we alert that clocks may be unsynchronized. +HEARTBEAT_DRIFT_MAX = 16 + +DRIFT_WARNING = """\ +Substantial drift from %s may mean clocks are out of sync. Current drift is +%s seconds. [orig: %s recv: %s] +""" + +CAN_KWDICT = sys.version_info >= (2, 6, 5) + +logger = get_logger(__name__) +warn = logger.warning + +R_STATE = '' +R_WORKER = ' HEARTBEAT_DRIFT_MAX: + _warn_drift(self.hostname, drift, + local_received, timestamp) + if local_received: + hearts = len(heartbeats) + if hearts > hbmax - 1: + hb_pop(0) + if hearts and local_received > heartbeats[-1]: + hb_append(local_received) + else: + insort(heartbeats, local_received) + return event + + def update(self, f, **kw): + for k, v in items(dict(f, **kw) if kw else f): + setattr(self, k, v) + + def __repr__(self): + return R_WORKER.format(self) + + @property + def status_string(self): + return 'ONLINE' if self.alive else 'OFFLINE' + + @property + def heartbeat_expires(self): + return heartbeat_expires(self.heartbeats[-1], + self.freq, self.expire_window) + + @property + def alive(self, nowfun=time): + return bool(self.heartbeats and nowfun() < self.heartbeat_expires) + + @property + def id(self): + return '{0.hostname}.{0.pid}'.format(self) + + @deprecated(3.2, 3.3) + def update_heartbeat(self, received, timestamp): + self.event(None, timestamp, received) + + @deprecated(3.2, 3.3) + def on_online(self, timestamp=None, local_received=None, **fields): + self.event('online', timestamp, local_received, fields) + + @deprecated(3.2, 3.3) + def on_offline(self, timestamp=None, local_received=None, **fields): + self.event('offline', timestamp, local_received, fields) + + @deprecated(3.2, 3.3) + def on_heartbeat(self, timestamp=None, local_received=None, **fields): + self.event('heartbeat', timestamp, local_received, fields) + + @class_property + def _defaults(cls): + """Deprecated, to be removed in 3.3""" + source = cls() + return dict((k, getattr(source, k)) for k in cls._fields) + + +@with_unique_field('uuid') +class Task(object): + """Task State.""" + name = received = sent = started = succeeded = failed = retried = \ + revoked = args = kwargs = eta = expires = retries = worker = result = \ + exception = timestamp = runtime = traceback = exchange = \ + routing_key = client = None + state = states.PENDING + clock = 0 + + _fields = ('uuid', 'name', 'state', 'received', 'sent', 'started', + 'succeeded', 'failed', 'retried', 'revoked', 'args', 'kwargs', + 'eta', 'expires', 'retries', 'worker', 'result', 'exception', + 'timestamp', 'runtime', 'traceback', 'exchange', 'routing_key', + 'clock', 'client') + if not PYPY: + __slots__ = ('__dict__', '__weakref__') + + #: How to merge out of order events. + #: Disorder is detected by logical ordering (e.g. :event:`task-received` + #: must have happened before a :event:`task-failed` event). + #: + #: A merge rule consists of a state and a list of fields to keep from + #: that state. ``(RECEIVED, ('name', 'args')``, means the name and args + #: fields are always taken from the RECEIVED state, and any values for + #: these fields received before or after is simply ignored. + merge_rules = {states.RECEIVED: ('name', 'args', 'kwargs', + 'retries', 'eta', 'expires')} + + #: meth:`info` displays these fields by default. + _info_fields = ('args', 'kwargs', 'retries', 'result', 'eta', 'runtime', + 'expires', 'exception', 'exchange', 'routing_key') + + def __init__(self, uuid=None, **kwargs): + self.uuid = uuid + if kwargs: + for k, v in items(kwargs): + setattr(self, k, v) + + def event(self, type_, timestamp=None, local_received=None, fields=None, + precedence=states.precedence, items=items, dict=dict, + PENDING=states.PENDING, RECEIVED=states.RECEIVED, + STARTED=states.STARTED, FAILURE=states.FAILURE, + RETRY=states.RETRY, SUCCESS=states.SUCCESS, + REVOKED=states.REVOKED): + fields = fields or {} + if type_ == 'sent': + state, self.sent = PENDING, timestamp + elif type_ == 'received': + state, self.received = RECEIVED, timestamp + elif type_ == 'started': + state, self.started = STARTED, timestamp + elif type_ == 'failed': + state, self.failed = FAILURE, timestamp + elif type_ == 'retried': + state, self.retried = RETRY, timestamp + elif type_ == 'succeeded': + state, self.succeeded = SUCCESS, timestamp + elif type_ == 'revoked': + state, self.revoked = REVOKED, timestamp + else: + state = type_.upper() + + # note that precedence here is reversed + # see implementation in celery.states.state.__lt__ + if state != RETRY and self.state != RETRY and \ + precedence(state) > precedence(self.state): + # this state logically happens-before the current state, so merge. + keep = self.merge_rules.get(state) + if keep is not None: + fields = dict( + (k, v) for k, v in items(fields) if k in keep + ) + for key, value in items(fields): + setattr(self, key, value) + else: + self.state = state + self.timestamp = timestamp + for key, value in items(fields): + setattr(self, key, value) + + def info(self, fields=None, extra=[]): + """Information about this task suitable for on-screen display.""" + fields = self._info_fields if fields is None else fields + + def _keys(): + for key in list(fields) + list(extra): + value = getattr(self, key, None) + if value is not None: + yield key, value + + return dict(_keys()) + + def __repr__(self): + return R_TASK.format(self) + + def as_dict(self): + get = object.__getattribute__ + return dict( + (k, get(self, k)) for k in self._fields + ) + + def __reduce__(self): + return _depickle_task, (self.__class__, self.as_dict()) + + @property + def origin(self): + return self.client if self.worker is None else self.worker.id + + @property + def ready(self): + return self.state in states.READY_STATES + + @deprecated(3.2, 3.3) + def on_sent(self, timestamp=None, **fields): + self.event('sent', timestamp, fields) + + @deprecated(3.2, 3.3) + def on_received(self, timestamp=None, **fields): + self.event('received', timestamp, fields) + + @deprecated(3.2, 3.3) + def on_started(self, timestamp=None, **fields): + self.event('started', timestamp, fields) + + @deprecated(3.2, 3.3) + def on_failed(self, timestamp=None, **fields): + self.event('failed', timestamp, fields) + + @deprecated(3.2, 3.3) + def on_retried(self, timestamp=None, **fields): + self.event('retried', timestamp, fields) + + @deprecated(3.2, 3.3) + def on_succeeded(self, timestamp=None, **fields): + self.event('succeeded', timestamp, fields) + + @deprecated(3.2, 3.3) + def on_revoked(self, timestamp=None, **fields): + self.event('revoked', timestamp, fields) + + @deprecated(3.2, 3.3) + def on_unknown_event(self, shortype, timestamp=None, **fields): + self.event(shortype, timestamp, fields) + + @deprecated(3.2, 3.3) + def update(self, state, timestamp, fields, + _state=states.state, RETRY=states.RETRY): + return self.event(state, timestamp, None, fields) + + @deprecated(3.2, 3.3) + def merge(self, state, timestamp, fields): + keep = self.merge_rules.get(state) + if keep is not None: + fields = dict((k, v) for k, v in items(fields) if k in keep) + for key, value in items(fields): + setattr(self, key, value) + + @class_property + def _defaults(cls): + """Deprecated, to be removed in 3.3.""" + source = cls() + return dict((k, getattr(source, k)) for k in source._fields) + + +class State(object): + """Records clusters state.""" + Worker = Worker + Task = Task + event_count = 0 + task_count = 0 + heap_multiplier = 4 + + def __init__(self, callback=None, + workers=None, tasks=None, taskheap=None, + max_workers_in_memory=5000, max_tasks_in_memory=10000, + on_node_join=None, on_node_leave=None): + self.event_callback = callback + self.workers = (LRUCache(max_workers_in_memory) + if workers is None else workers) + self.tasks = (LRUCache(max_tasks_in_memory) + if tasks is None else tasks) + self._taskheap = [] if taskheap is None else taskheap + self.max_workers_in_memory = max_workers_in_memory + self.max_tasks_in_memory = max_tasks_in_memory + self.on_node_join = on_node_join + self.on_node_leave = on_node_leave + self._mutex = threading.Lock() + self.handlers = {} + self._seen_types = set() + self.rebuild_taskheap() + + @cached_property + def _event(self): + return self._create_dispatcher() + + def freeze_while(self, fun, *args, **kwargs): + clear_after = kwargs.pop('clear_after', False) + with self._mutex: + try: + return fun(*args, **kwargs) + finally: + if clear_after: + self._clear() + + def clear_tasks(self, ready=True): + with self._mutex: + return self._clear_tasks(ready) + + def _clear_tasks(self, ready=True): + if ready: + in_progress = dict( + (uuid, task) for uuid, task in self.itertasks() + if task.state not in states.READY_STATES) + self.tasks.clear() + self.tasks.update(in_progress) + else: + self.tasks.clear() + self._taskheap[:] = [] + + def _clear(self, ready=True): + self.workers.clear() + self._clear_tasks(ready) + self.event_count = 0 + self.task_count = 0 + + def clear(self, ready=True): + with self._mutex: + return self._clear(ready) + + def get_or_create_worker(self, hostname, **kwargs): + """Get or create worker by hostname. + + Return tuple of ``(worker, was_created)``. + """ + try: + worker = self.workers[hostname] + if kwargs: + worker.update(kwargs) + return worker, False + except KeyError: + worker = self.workers[hostname] = self.Worker( + hostname, **kwargs) + return worker, True + + def get_or_create_task(self, uuid): + """Get or create task by uuid.""" + try: + return self.tasks[uuid], False + except KeyError: + task = self.tasks[uuid] = self.Task(uuid) + return task, True + + def event(self, event): + with self._mutex: + return self._event(event) + + def task_event(self, type_, fields): + """Deprecated, use :meth:`event`.""" + return self._event(dict(fields, type='-'.join(['task', type_])))[0] + + def worker_event(self, type_, fields): + """Deprecated, use :meth:`event`.""" + return self._event(dict(fields, type='-'.join(['worker', type_])))[0] + + def _create_dispatcher(self): + get_handler = self.handlers.__getitem__ + event_callback = self.event_callback + wfields = itemgetter('hostname', 'timestamp', 'local_received') + tfields = itemgetter('uuid', 'hostname', 'timestamp', + 'local_received', 'clock') + taskheap = self._taskheap + th_append = taskheap.append + th_pop = taskheap.pop + # Removing events from task heap is an O(n) operation, + # so easier to just account for the common number of events + # for each task (PENDING->RECEIVED->STARTED->final) + #: an O(n) operation + max_events_in_heap = self.max_tasks_in_memory * self.heap_multiplier + add_type = self._seen_types.add + on_node_join, on_node_leave = self.on_node_join, self.on_node_leave + tasks, Task = self.tasks, self.Task + workers, Worker = self.workers, self.Worker + # avoid updating LRU entry at getitem + get_worker, get_task = workers.data.__getitem__, tasks.data.__getitem__ + + def _event(event, + timetuple=timetuple, KeyError=KeyError, + insort=bisect.insort, created=True): + self.event_count += 1 + if event_callback: + event_callback(self, event) + group, _, subject = event['type'].partition('-') + try: + handler = get_handler(group) + except KeyError: + pass + else: + return handler(subject, event), subject + + if group == 'worker': + try: + hostname, timestamp, local_received = wfields(event) + except KeyError: + pass + else: + is_offline = subject == 'offline' + try: + worker, created = get_worker(hostname), False + except KeyError: + if is_offline: + worker, created = Worker(hostname), False + else: + worker = workers[hostname] = Worker(hostname) + worker.event(subject, timestamp, local_received, event) + if on_node_join and (created or subject == 'online'): + on_node_join(worker) + if on_node_leave and is_offline: + on_node_leave(worker) + workers.pop(hostname, None) + return (worker, created), subject + elif group == 'task': + (uuid, hostname, timestamp, + local_received, clock) = tfields(event) + # task-sent event is sent by client, not worker + is_client_event = subject == 'sent' + try: + task, created = get_task(uuid), False + except KeyError: + task = tasks[uuid] = Task(uuid) + if is_client_event: + task.client = hostname + else: + try: + worker, created = get_worker(hostname), False + except KeyError: + worker = workers[hostname] = Worker(hostname) + task.worker = worker + if worker is not None and local_received: + worker.event(None, local_received, timestamp) + + origin = hostname if is_client_event else worker.id + + # remove oldest event if exceeding the limit. + heaps = len(taskheap) + if heaps + 1 > max_events_in_heap: + th_pop(0) + + # most events will be dated later than the previous. + timetup = timetuple(clock, timestamp, origin, ref(task)) + if heaps and timetup > taskheap[-1]: + th_append(timetup) + else: + insort(taskheap, timetup) + + if subject == 'received': + self.task_count += 1 + task.event(subject, timestamp, local_received, event) + task_name = task.name + if task_name is not None: + add_type(task_name) + return (task, created), subject + return _event + + def rebuild_taskheap(self, timetuple=timetuple): + heap = self._taskheap[:] = [ + timetuple(t.clock, t.timestamp, t.origin, ref(t)) + for t in values(self.tasks) + ] + heap.sort() + + def itertasks(self, limit=None): + for index, row in enumerate(items(self.tasks)): + yield row + if limit and index + 1 >= limit: + break + + def tasks_by_time(self, limit=None): + """Generator giving tasks ordered by time, + in ``(uuid, Task)`` tuples.""" + seen = set() + for evtup in islice(reversed(self._taskheap), 0, limit): + task = evtup[3]() + if task is not None: + uuid = task.uuid + if uuid not in seen: + yield uuid, task + seen.add(uuid) + tasks_by_timestamp = tasks_by_time + + def tasks_by_type(self, name, limit=None): + """Get all tasks by type. + + Return a list of ``(uuid, Task)`` tuples. + + """ + return islice( + ((uuid, task) for uuid, task in self.tasks_by_time() + if task.name == name), + 0, limit, + ) + + def tasks_by_worker(self, hostname, limit=None): + """Get all tasks by worker. + + """ + return islice( + ((uuid, task) for uuid, task in self.tasks_by_time() + if task.worker.hostname == hostname), + 0, limit, + ) + + def task_types(self): + """Return a list of all seen task types.""" + return sorted(self._seen_types) + + def alive_workers(self): + """Return a list of (seemingly) alive workers.""" + return [w for w in values(self.workers) if w.alive] + + def __repr__(self): + return R_STATE.format(self) + + def __reduce__(self): + return self.__class__, ( + self.event_callback, self.workers, self.tasks, None, + self.max_workers_in_memory, self.max_tasks_in_memory, + self.on_node_join, self.on_node_leave, + ) diff --git a/celery/exceptions.py b/celery/exceptions.py new file mode 100644 index 0000000..ab65019 --- /dev/null +++ b/celery/exceptions.py @@ -0,0 +1,171 @@ +# -*- coding: utf-8 -*- +""" + celery.exceptions + ~~~~~~~~~~~~~~~~~ + + This module contains all exceptions used by the Celery API. + +""" +from __future__ import absolute_import + +import numbers + +from .five import string_t + +from billiard.exceptions import ( # noqa + SoftTimeLimitExceeded, TimeLimitExceeded, WorkerLostError, Terminated, +) + +__all__ = ['SecurityError', 'Ignore', 'QueueNotFound', + 'WorkerShutdown', 'WorkerTerminate', + 'ImproperlyConfigured', 'NotRegistered', 'AlreadyRegistered', + 'TimeoutError', 'MaxRetriesExceededError', 'Retry', + 'TaskRevokedError', 'NotConfigured', 'AlwaysEagerIgnored', + 'InvalidTaskError', 'ChordError', 'CPendingDeprecationWarning', + 'CDeprecationWarning', 'FixupWarning', 'DuplicateNodenameWarning', + 'SoftTimeLimitExceeded', 'TimeLimitExceeded', 'WorkerLostError', + 'Terminated'] + +UNREGISTERED_FMT = """\ +Task of kind {0} is not registered, please make sure it's imported.\ +""" + + +class SecurityError(Exception): + """Security related exceptions. + + Handle with care. + + """ + + +class Ignore(Exception): + """A task can raise this to ignore doing state updates.""" + + +class Reject(Exception): + """A task can raise this if it wants to reject/requeue the message.""" + + def __init__(self, reason=None, requeue=False): + self.reason = reason + self.requeue = requeue + super(Reject, self).__init__(reason, requeue) + + def __repr__(self): + return 'reject requeue=%s: %s' % (self.requeue, self.reason) + + +class WorkerTerminate(SystemExit): + """Signals that the worker should terminate immediately.""" +SystemTerminate = WorkerTerminate # XXX compat + + +class WorkerShutdown(SystemExit): + """Signals that the worker should perform a warm shutdown.""" + + +class QueueNotFound(KeyError): + """Task routed to a queue not in CELERY_QUEUES.""" + + +class ImproperlyConfigured(ImportError): + """Celery is somehow improperly configured.""" + + +class NotRegistered(KeyError): + """The task is not registered.""" + + def __repr__(self): + return UNREGISTERED_FMT.format(self) + + +class AlreadyRegistered(Exception): + """The task is already registered.""" + + +class TimeoutError(Exception): + """The operation timed out.""" + + +class MaxRetriesExceededError(Exception): + """The tasks max restart limit has been exceeded.""" + + +class Retry(Exception): + """The task is to be retried later.""" + + #: Optional message describing context of retry. + message = None + + #: Exception (if any) that caused the retry to happen. + exc = None + + #: Time of retry (ETA), either :class:`numbers.Real` or + #: :class:`~datetime.datetime`. + when = None + + def __init__(self, message=None, exc=None, when=None, **kwargs): + from kombu.utils.encoding import safe_repr + self.message = message + if isinstance(exc, string_t): + self.exc, self.excs = None, exc + else: + self.exc, self.excs = exc, safe_repr(exc) if exc else None + self.when = when + Exception.__init__(self, exc, when, **kwargs) + + def humanize(self): + if isinstance(self.when, numbers.Real): + return 'in {0.when}s'.format(self) + return 'at {0.when}'.format(self) + + def __str__(self): + if self.message: + return self.message + if self.excs: + return 'Retry {0}: {1}'.format(self.humanize(), self.excs) + return 'Retry {0}'.format(self.humanize()) + + def __reduce__(self): + return self.__class__, (self.message, self.excs, self.when) +RetryTaskError = Retry # XXX compat + + +class TaskRevokedError(Exception): + """The task has been revoked, so no result available.""" + + +class NotConfigured(UserWarning): + """Celery has not been configured, as no config module has been found.""" + + +class AlwaysEagerIgnored(UserWarning): + """send_task ignores CELERY_ALWAYS_EAGER option""" + + +class InvalidTaskError(Exception): + """The task has invalid data or is not properly constructed.""" + + +class IncompleteStream(Exception): + """Found the end of a stream of data, but the data is not yet complete.""" + + +class ChordError(Exception): + """A task part of the chord raised an exception.""" + + +class CPendingDeprecationWarning(PendingDeprecationWarning): + pass + + +class CDeprecationWarning(DeprecationWarning): + pass + + +class FixupWarning(UserWarning): + pass + + +class DuplicateNodenameWarning(UserWarning): + """Multiple workers are using the same nodename.""" diff --git a/celery/five.py b/celery/five.py new file mode 100644 index 0000000..0d209d3 --- /dev/null +++ b/celery/five.py @@ -0,0 +1,393 @@ +# -*- coding: utf-8 -*- +""" + celery.five + ~~~~~~~~~~~ + + Compatibility implementations of features + only available in newer Python versions. + + +""" +from __future__ import absolute_import + +__all__ = ['Counter', 'reload', 'UserList', 'UserDict', 'Queue', 'Empty', + 'zip_longest', 'map', 'string', 'string_t', + 'long_t', 'text_t', 'range', 'int_types', 'items', 'keys', 'values', + 'nextfun', 'reraise', 'WhateverIO', 'with_metaclass', + 'OrderedDict', 'THREAD_TIMEOUT_MAX', 'format_d', + 'class_property', 'reclassmethod', 'create_module', + 'recreate_module', 'monotonic'] + +import io + +try: + from collections import Counter +except ImportError: # pragma: no cover + from collections import defaultdict + + def Counter(): # noqa + return defaultdict(int) + +# ############# py3k ######################################################### +import sys +PY3 = sys.version_info[0] == 3 + +try: + reload = reload # noqa +except NameError: # pragma: no cover + from imp import reload # noqa + +try: + from UserList import UserList # noqa +except ImportError: # pragma: no cover + from collections import UserList # noqa + +try: + from UserDict import UserDict # noqa +except ImportError: # pragma: no cover + from collections import UserDict # noqa + + +from kombu.five import monotonic + +if PY3: # pragma: no cover + import builtins + + from queue import Queue, Empty + from itertools import zip_longest + + map = map + string = str + string_t = str + long_t = int + text_t = str + range = range + int_types = (int, ) + _byte_t = bytes + + open_fqdn = 'builtins.open' + + def items(d): + return d.items() + + def keys(d): + return d.keys() + + def values(d): + return d.values() + + def nextfun(it): + return it.__next__ + + exec_ = getattr(builtins, 'exec') + + def reraise(tp, value, tb=None): + if value.__traceback__ is not tb: + raise value.with_traceback(tb) + raise value + +else: + import __builtin__ as builtins # noqa + from Queue import Queue, Empty # noqa + from itertools import imap as map, izip_longest as zip_longest # noqa + string = unicode # noqa + string_t = basestring # noqa + text_t = unicode # noqa + long_t = long # noqa + range = xrange # noqa + int_types = (int, long) # noqa + _byte_t = (str, bytes) # noqa + + open_fqdn = '__builtin__.open' + + def items(d): # noqa + return d.iteritems() + + def keys(d): # noqa + return d.iterkeys() + + def values(d): # noqa + return d.itervalues() + + def nextfun(it): # noqa + return it.next + + def exec_(code, globs=None, locs=None): # pragma: no cover + """Execute code in a namespace.""" + if globs is None: + frame = sys._getframe(1) + globs = frame.f_globals + if locs is None: + locs = frame.f_locals + del frame + elif locs is None: + locs = globs + exec("""exec code in globs, locs""") + + exec_("""def reraise(tp, value, tb=None): raise tp, value, tb""") + + +def with_metaclass(Type, skip_attrs=set(['__dict__', '__weakref__'])): + """Class decorator to set metaclass. + + Works with both Python 2 and Python 3 and it does not add + an extra class in the lookup order like ``six.with_metaclass`` does + (that is -- it copies the original class instead of using inheritance). + + """ + + def _clone_with_metaclass(Class): + attrs = dict((key, value) for key, value in items(vars(Class)) + if key not in skip_attrs) + return Type(Class.__name__, Class.__bases__, attrs) + + return _clone_with_metaclass + + +# ############# collections.OrderedDict ###################################### +# was moved to kombu +from kombu.utils.compat import OrderedDict # noqa + +# ############# threading.TIMEOUT_MAX ######################################## +try: + from threading import TIMEOUT_MAX as THREAD_TIMEOUT_MAX +except ImportError: + THREAD_TIMEOUT_MAX = 1e10 # noqa + +# ############# format(int, ',d') ############################################ + +if sys.version_info >= (2, 7): # pragma: no cover + def format_d(i): + return format(i, ',d') +else: # pragma: no cover + def format_d(i): # noqa + s = '%d' % i + groups = [] + while s and s[-1].isdigit(): + groups.append(s[-3:]) + s = s[:-3] + return s + ','.join(reversed(groups)) + + +# ############# Module Generation ############################################ + +# Utilities to dynamically +# recreate modules, either for lazy loading or +# to create old modules at runtime instead of +# having them litter the source tree. +import operator +import sys + +# import fails in python 2.5. fallback to reduce in stdlib +try: + from functools import reduce +except ImportError: + pass + +from importlib import import_module +from types import ModuleType + +MODULE_DEPRECATED = """ +The module %s is deprecated and will be removed in a future version. +""" + +DEFAULT_ATTRS = set(['__file__', '__path__', '__doc__', '__all__']) + +# im_func is no longer available in Py3. +# instead the unbound method itself can be used. +if sys.version_info[0] == 3: # pragma: no cover + def fun_of_method(method): + return method +else: + def fun_of_method(method): # noqa + return method.im_func + + +def getappattr(path): + """Gets attribute from the current_app recursively, + e.g. getappattr('amqp.get_task_consumer')``.""" + from celery import current_app + return current_app._rgetattr(path) + + +def _compat_task_decorator(*args, **kwargs): + from celery import current_app + kwargs.setdefault('accept_magic_kwargs', True) + return current_app.task(*args, **kwargs) + + +def _compat_periodic_task_decorator(*args, **kwargs): + from celery.task import periodic_task + kwargs.setdefault('accept_magic_kwargs', True) + return periodic_task(*args, **kwargs) + + +COMPAT_MODULES = { + 'celery': { + 'execute': { + 'send_task': 'send_task', + }, + 'decorators': { + 'task': _compat_task_decorator, + 'periodic_task': _compat_periodic_task_decorator, + }, + 'log': { + 'get_default_logger': 'log.get_default_logger', + 'setup_logger': 'log.setup_logger', + 'setup_loggig_subsystem': 'log.setup_logging_subsystem', + 'redirect_stdouts_to_logger': 'log.redirect_stdouts_to_logger', + }, + 'messaging': { + 'TaskPublisher': 'amqp.TaskPublisher', + 'TaskConsumer': 'amqp.TaskConsumer', + 'establish_connection': 'connection', + 'get_consumer_set': 'amqp.TaskConsumer', + }, + 'registry': { + 'tasks': 'tasks', + }, + }, + 'celery.task': { + 'control': { + 'broadcast': 'control.broadcast', + 'rate_limit': 'control.rate_limit', + 'time_limit': 'control.time_limit', + 'ping': 'control.ping', + 'revoke': 'control.revoke', + 'discard_all': 'control.purge', + 'inspect': 'control.inspect', + }, + 'schedules': 'celery.schedules', + 'chords': 'celery.canvas', + } +} + + +class class_property(object): + + def __init__(self, getter=None, setter=None): + if getter is not None and not isinstance(getter, classmethod): + getter = classmethod(getter) + if setter is not None and not isinstance(setter, classmethod): + setter = classmethod(setter) + self.__get = getter + self.__set = setter + + info = getter.__get__(object) # just need the info attrs. + self.__doc__ = info.__doc__ + self.__name__ = info.__name__ + self.__module__ = info.__module__ + + def __get__(self, obj, type=None): + if obj and type is None: + type = obj.__class__ + return self.__get.__get__(obj, type)() + + def __set__(self, obj, value): + if obj is None: + return self + return self.__set.__get__(obj)(value) + + def setter(self, setter): + return self.__class__(self.__get, setter) + + +def reclassmethod(method): + return classmethod(fun_of_method(method)) + + +class LazyModule(ModuleType): + _compat_modules = () + _all_by_module = {} + _direct = {} + _object_origins = {} + + def __getattr__(self, name): + if name in self._object_origins: + module = __import__(self._object_origins[name], None, None, [name]) + for item in self._all_by_module[module.__name__]: + setattr(self, item, getattr(module, item)) + return getattr(module, name) + elif name in self._direct: # pragma: no cover + module = __import__(self._direct[name], None, None, [name]) + setattr(self, name, module) + return module + return ModuleType.__getattribute__(self, name) + + def __dir__(self): + return list(set(self.__all__) | DEFAULT_ATTRS) + + def __reduce__(self): + return import_module, (self.__name__, ) + + +def create_module(name, attrs, cls_attrs=None, pkg=None, + base=LazyModule, prepare_attr=None): + fqdn = '.'.join([pkg.__name__, name]) if pkg else name + cls_attrs = {} if cls_attrs is None else cls_attrs + pkg, _, modname = name.rpartition('.') + cls_attrs['__module__'] = pkg + + attrs = dict((attr_name, prepare_attr(attr) if prepare_attr else attr) + for attr_name, attr in items(attrs)) + module = sys.modules[fqdn] = type(modname, (base, ), cls_attrs)(fqdn) + module.__dict__.update(attrs) + return module + + +def recreate_module(name, compat_modules=(), by_module={}, direct={}, + base=LazyModule, **attrs): + old_module = sys.modules[name] + origins = get_origins(by_module) + compat_modules = COMPAT_MODULES.get(name, ()) + + cattrs = dict( + _compat_modules=compat_modules, + _all_by_module=by_module, _direct=direct, + _object_origins=origins, + __all__=tuple(set(reduce( + operator.add, + [tuple(v) for v in [compat_modules, origins, direct, attrs]], + ))), + ) + new_module = create_module(name, attrs, cls_attrs=cattrs, base=base) + new_module.__dict__.update(dict((mod, get_compat_module(new_module, mod)) + for mod in compat_modules)) + return old_module, new_module + + +def get_compat_module(pkg, name): + from .local import Proxy + + def prepare(attr): + if isinstance(attr, string_t): + return Proxy(getappattr, (attr, )) + return attr + + attrs = COMPAT_MODULES[pkg.__name__][name] + if isinstance(attrs, string_t): + fqdn = '.'.join([pkg.__name__, name]) + module = sys.modules[fqdn] = import_module(attrs) + return module + attrs['__all__'] = list(attrs) + return create_module(name, dict(attrs), pkg=pkg, prepare_attr=prepare) + + +def get_origins(defs): + origins = {} + for module, attrs in items(defs): + origins.update(dict((attr, module) for attr in attrs)) + return origins + + +_SIO_write = io.StringIO.write +_SIO_init = io.StringIO.__init__ + + +class WhateverIO(io.StringIO): + + def __init__(self, v=None, *a, **kw): + _SIO_init(self, v.decode() if isinstance(v, _byte_t) else v, *a, **kw) + + def write(self, data): + _SIO_write(self, data.decode() if isinstance(data, _byte_t) else data) diff --git a/celery/fixups/__init__.py b/celery/fixups/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/celery/fixups/django.py b/celery/fixups/django.py new file mode 100644 index 0000000..f9ad331 --- /dev/null +++ b/celery/fixups/django.py @@ -0,0 +1,237 @@ +from __future__ import absolute_import + +import io +import os +import sys +import warnings + +from kombu.utils import cached_property, symbol_by_name + +from datetime import datetime +from importlib import import_module + +from celery import signals +from celery.exceptions import FixupWarning + +__all__ = ['DjangoFixup', 'fixup'] + +ERR_NOT_INSTALLED = """\ +Environment variable DJANGO_SETTINGS_MODULE is defined +but Django is not installed. Will not apply Django fixups! +""" + + +def _maybe_close_fd(fh): + try: + os.close(fh.fileno()) + except (AttributeError, OSError, TypeError): + # TypeError added for celery#962 + pass + + +def fixup(app, env='DJANGO_SETTINGS_MODULE'): + SETTINGS_MODULE = os.environ.get(env) + if SETTINGS_MODULE and 'django' not in app.loader_cls.lower(): + try: + import django # noqa + except ImportError: + warnings.warn(FixupWarning(ERR_NOT_INSTALLED)) + else: + return DjangoFixup(app).install() + + +class DjangoFixup(object): + + def __init__(self, app): + self.app = app + self.app.set_default() + + def install(self): + # Need to add project directory to path + sys.path.append(os.getcwd()) + + self.app.loader.now = self.now + self.app.loader.mail_admins = self.mail_admins + + signals.worker_init.connect(self.on_worker_init) + return self + + def on_worker_init(self, **kwargs): + # keep reference + self._worker_fixup = DjangoWorkerFixup(self.app).install() + + def now(self, utc=False): + return datetime.utcnow() if utc else self._now() + + def mail_admins(self, subject, body, fail_silently=False, **kwargs): + return self._mail_admins(subject, body, fail_silently=fail_silently) + + @cached_property + def _mail_admins(self): + return symbol_by_name('django.core.mail:mail_admins') + + @cached_property + def _now(self): + try: + return symbol_by_name('django.utils.timezone:now') + except (AttributeError, ImportError): # pre django-1.4 + return datetime.now + + +class DjangoWorkerFixup(object): + _db_recycles = 0 + + def __init__(self, app): + self.app = app + self.db_reuse_max = self.app.conf.get('CELERY_DB_REUSE_MAX', None) + self._db = import_module('django.db') + self._cache = import_module('django.core.cache') + self._settings = symbol_by_name('django.conf:settings') + + # Database-related exceptions. + DatabaseError = symbol_by_name('django.db:DatabaseError') + try: + import MySQLdb as mysql + _my_database_errors = (mysql.DatabaseError, + mysql.InterfaceError, + mysql.OperationalError) + except ImportError: + _my_database_errors = () # noqa + try: + import psycopg2 as pg + _pg_database_errors = (pg.DatabaseError, + pg.InterfaceError, + pg.OperationalError) + except ImportError: + _pg_database_errors = () # noqa + try: + import sqlite3 + _lite_database_errors = (sqlite3.DatabaseError, + sqlite3.InterfaceError, + sqlite3.OperationalError) + except ImportError: + _lite_database_errors = () # noqa + try: + import cx_Oracle as oracle + _oracle_database_errors = (oracle.DatabaseError, + oracle.InterfaceError, + oracle.OperationalError) + except ImportError: + _oracle_database_errors = () # noqa + + try: + self._close_old_connections = symbol_by_name( + 'django.db:close_old_connections', + ) + except (ImportError, AttributeError): + self._close_old_connections = None + self.database_errors = ( + (DatabaseError, ) + + _my_database_errors + + _pg_database_errors + + _lite_database_errors + + _oracle_database_errors + ) + + def validate_models(self): + import django + try: + django.setup() + except AttributeError: + pass + s = io.StringIO() + try: + from django.core.management.validation import get_validation_errors + except ImportError: + from django.core.management.base import BaseCommand + cmd = BaseCommand() + cmd.stdout, cmd.stderr = sys.stdout, sys.stderr + cmd.check() + else: + num_errors = get_validation_errors(s, None) + if num_errors: + raise RuntimeError( + 'One or more Django models did not validate:\n{0}'.format( + s.getvalue())) + + def install(self): + signals.beat_embedded_init.connect(self.close_database) + signals.worker_ready.connect(self.on_worker_ready) + signals.task_prerun.connect(self.on_task_prerun) + signals.task_postrun.connect(self.on_task_postrun) + signals.worker_process_init.connect(self.on_worker_process_init) + self.validate_models() + self.close_database() + self.close_cache() + return self + + def on_worker_process_init(self, **kwargs): + # the parent process may have established these, + # so need to close them. + + # calling db.close() on some DB connections will cause + # the inherited DB conn to also get broken in the parent + # process so we need to remove it without triggering any + # network IO that close() might cause. + try: + for c in self._db.connections.all(): + if c and c.connection: + _maybe_close_fd(c.connection) + except AttributeError: + if self._db.connection and self._db.connection.connection: + _maybe_close_fd(self._db.connection.connection) + + # use the _ version to avoid DB_REUSE preventing the conn.close() call + self._close_database() + self.close_cache() + + def on_task_prerun(self, sender, **kwargs): + """Called before every task.""" + if not getattr(sender.request, 'is_eager', False): + self.close_database() + + def on_task_postrun(self, sender, **kwargs): + # See http://groups.google.com/group/django-users/ + # browse_thread/thread/78200863d0c07c6d/ + if not getattr(sender.request, 'is_eager', False): + self.close_database() + self.close_cache() + + def close_database(self, **kwargs): + if self._close_old_connections: + return self._close_old_connections() # Django 1.6 + if not self.db_reuse_max: + return self._close_database() + if self._db_recycles >= self.db_reuse_max * 2: + self._db_recycles = 0 + self._close_database() + self._db_recycles += 1 + + def _close_database(self): + try: + funs = [conn.close for conn in self._db.connections] + except AttributeError: + if hasattr(self._db, 'close_old_connections'): # django 1.6 + funs = [self._db.close_old_connections] + else: + # pre multidb, pending deprication in django 1.6 + funs = [self._db.close_connection] + + for close in funs: + try: + close() + except self.database_errors as exc: + str_exc = str(exc) + if 'closed' not in str_exc and 'not connected' not in str_exc: + raise + + def close_cache(self): + try: + self._cache.cache.close() + except (TypeError, AttributeError): + pass + + def on_worker_ready(self, **kwargs): + if self._settings.DEBUG: + warnings.warn('Using settings.DEBUG leads to a memory leak, never ' + 'use this setting in production environments!') diff --git a/celery/loaders/__init__.py b/celery/loaders/__init__.py new file mode 100644 index 0000000..2a39ba2 --- /dev/null +++ b/celery/loaders/__init__.py @@ -0,0 +1,37 @@ +# -*- coding: utf-8 -*- +""" + celery.loaders + ~~~~~~~~~~~~~~ + + Loaders define how configuration is read, what happens + when workers start, when tasks are executed and so on. + +""" +from __future__ import absolute_import + +from celery._state import current_app +from celery.utils import deprecated +from celery.utils.imports import symbol_by_name, import_from_cwd + +__all__ = ['get_loader_cls'] + +LOADER_ALIASES = {'app': 'celery.loaders.app:AppLoader', + 'default': 'celery.loaders.default:Loader', + 'django': 'djcelery.loaders:DjangoLoader'} + + +def get_loader_cls(loader): + """Get loader class by name/alias""" + return symbol_by_name(loader, LOADER_ALIASES, imp=import_from_cwd) + + +@deprecated(deprecation=2.5, removal=4.0, + alternative='celery.current_app.loader') +def current_loader(): + return current_app.loader + + +@deprecated(deprecation=2.5, removal=4.0, + alternative='celery.current_app.conf') +def load_settings(): + return current_app.conf diff --git a/celery/loaders/app.py b/celery/loaders/app.py new file mode 100644 index 0000000..87f034b --- /dev/null +++ b/celery/loaders/app.py @@ -0,0 +1,17 @@ +# -*- coding: utf-8 -*- +""" + celery.loaders.app + ~~~~~~~~~~~~~~~~~~ + + The default loader used with custom app instances. + +""" +from __future__ import absolute_import + +from .base import BaseLoader + +__all__ = ['AppLoader'] + + +class AppLoader(BaseLoader): + pass diff --git a/celery/loaders/base.py b/celery/loaders/base.py new file mode 100644 index 0000000..e58a3d6 --- /dev/null +++ b/celery/loaders/base.py @@ -0,0 +1,291 @@ +# -*- coding: utf-8 -*- +""" + celery.loaders.base + ~~~~~~~~~~~~~~~~~~~ + + Loader base class. + +""" +from __future__ import absolute_import + +import anyjson +import imp as _imp +import importlib +import os +import re +import sys + +from datetime import datetime + +from kombu.utils import cached_property +from kombu.utils.encoding import safe_str + +from celery import signals +from celery.datastructures import DictAttribute, force_mapping +from celery.five import reraise, string_t +from celery.utils.functional import maybe_list +from celery.utils.imports import ( + import_from_cwd, symbol_by_name, NotAPackage, find_module, +) + +__all__ = ['BaseLoader'] + +_RACE_PROTECTION = False +CONFIG_INVALID_NAME = """\ +Error: Module '{module}' doesn't exist, or it's not a valid \ +Python module name. +""" + +CONFIG_WITH_SUFFIX = CONFIG_INVALID_NAME + """\ +Did you mean '{suggest}'? +""" + + +class BaseLoader(object): + """The base class for loaders. + + Loaders handles, + + * Reading celery client/worker configurations. + + * What happens when a task starts? + See :meth:`on_task_init`. + + * What happens when the worker starts? + See :meth:`on_worker_init`. + + * What happens when the worker shuts down? + See :meth:`on_worker_shutdown`. + + * What modules are imported to find tasks? + + """ + builtin_modules = frozenset() + configured = False + override_backends = {} + worker_initialized = False + + _conf = None + + def __init__(self, app, **kwargs): + self.app = app + self.task_modules = set() + + def now(self, utc=True): + if utc: + return datetime.utcnow() + return datetime.now() + + def on_task_init(self, task_id, task): + """This method is called before a task is executed.""" + pass + + def on_process_cleanup(self): + """This method is called after a task is executed.""" + pass + + def on_worker_init(self): + """This method is called when the worker (:program:`celery worker`) + starts.""" + pass + + def on_worker_shutdown(self): + """This method is called when the worker (:program:`celery worker`) + shuts down.""" + pass + + def on_worker_process_init(self): + """This method is called when a child process starts.""" + pass + + def import_task_module(self, module): + self.task_modules.add(module) + return self.import_from_cwd(module) + + def import_module(self, module, package=None): + return importlib.import_module(module, package=package) + + def import_from_cwd(self, module, imp=None, package=None): + return import_from_cwd( + module, + self.import_module if imp is None else imp, + package=package, + ) + + def import_default_modules(self): + signals.import_modules.send(sender=self.app) + return [ + self.import_task_module(m) for m in ( + tuple(self.builtin_modules) + + tuple(maybe_list(self.app.conf.CELERY_IMPORTS)) + + tuple(maybe_list(self.app.conf.CELERY_INCLUDE)) + ) + ] + + def init_worker(self): + if not self.worker_initialized: + self.worker_initialized = True + self.import_default_modules() + self.on_worker_init() + + def shutdown_worker(self): + self.on_worker_shutdown() + + def init_worker_process(self): + self.on_worker_process_init() + + def config_from_object(self, obj, silent=False): + if isinstance(obj, string_t): + try: + obj = self._smart_import(obj, imp=self.import_from_cwd) + except (ImportError, AttributeError): + if silent: + return False + raise + self._conf = force_mapping(obj) + return True + + def _smart_import(self, path, imp=None): + imp = self.import_module if imp is None else imp + if ':' in path: + # Path includes attribute so can just jump here. + # e.g. ``os.path:abspath``. + return symbol_by_name(path, imp=imp) + + # Not sure if path is just a module name or if it includes an + # attribute name (e.g. ``os.path``, vs, ``os.path.abspath`` + try: + return imp(path) + except ImportError: + # Not a module name, so try module + attribute. + return symbol_by_name(path, imp=imp) + + def _import_config_module(self, name): + try: + self.find_module(name) + except NotAPackage: + if name.endswith('.py'): + reraise(NotAPackage, NotAPackage(CONFIG_WITH_SUFFIX.format( + module=name, suggest=name[:-3])), sys.exc_info()[2]) + reraise(NotAPackage, NotAPackage(CONFIG_INVALID_NAME.format( + module=name)), sys.exc_info()[2]) + else: + return self.import_from_cwd(name) + + def find_module(self, module): + return find_module(module) + + def cmdline_config_parser( + self, args, namespace='celery', + re_type=re.compile(r'\((\w+)\)'), + extra_types={'json': anyjson.loads}, + override_types={'tuple': 'json', + 'list': 'json', + 'dict': 'json'}): + from celery.app.defaults import Option, NAMESPACES + namespace = namespace.upper() + typemap = dict(Option.typemap, **extra_types) + + def getarg(arg): + """Parse a single configuration definition from + the command-line.""" + + # ## find key/value + # ns.key=value|ns_key=value (case insensitive) + key, value = arg.split('=', 1) + key = key.upper().replace('.', '_') + + # ## find namespace. + # .key=value|_key=value expands to default namespace. + if key[0] == '_': + ns, key = namespace, key[1:] + else: + # find namespace part of key + ns, key = key.split('_', 1) + + ns_key = (ns and ns + '_' or '') + key + + # (type)value makes cast to custom type. + cast = re_type.match(value) + if cast: + type_ = cast.groups()[0] + type_ = override_types.get(type_, type_) + value = value[len(cast.group()):] + value = typemap[type_](value) + else: + try: + value = NAMESPACES[ns][key].to_python(value) + except ValueError as exc: + # display key name in error message. + raise ValueError('{0!r}: {1}'.format(ns_key, exc)) + return ns_key, value + return dict(getarg(arg) for arg in args) + + def mail_admins(self, subject, body, fail_silently=False, + sender=None, to=None, host=None, port=None, + user=None, password=None, timeout=None, + use_ssl=False, use_tls=False): + message = self.mail.Message(sender=sender, to=to, + subject=safe_str(subject), + body=safe_str(body)) + mailer = self.mail.Mailer(host=host, port=port, + user=user, password=password, + timeout=timeout, use_ssl=use_ssl, + use_tls=use_tls) + mailer.send(message, fail_silently=fail_silently) + + def read_configuration(self, env='CELERY_CONFIG_MODULE'): + try: + custom_config = os.environ[env] + except KeyError: + pass + else: + if custom_config: + usercfg = self._import_config_module(custom_config) + return DictAttribute(usercfg) + return {} + + def autodiscover_tasks(self, packages, related_name='tasks'): + self.task_modules.update( + mod.__name__ for mod in autodiscover_tasks(packages or (), + related_name) if mod) + + @property + def conf(self): + """Loader configuration.""" + if self._conf is None: + self._conf = self.read_configuration() + return self._conf + + @cached_property + def mail(self): + return self.import_module('celery.utils.mail') + + +def autodiscover_tasks(packages, related_name='tasks'): + global _RACE_PROTECTION + + if _RACE_PROTECTION: + return () + _RACE_PROTECTION = True + try: + return [find_related_module(pkg, related_name) for pkg in packages] + finally: + _RACE_PROTECTION = False + + +def find_related_module(package, related_name): + """Given a package name and a module name, tries to find that + module.""" + + try: + pkg_path = importlib.import_module(package).__path__ + except AttributeError: + return + + try: + _imp.find_module(related_name, pkg_path) + except ImportError: + return + + return importlib.import_module('{0}.{1}'.format(package, related_name)) diff --git a/celery/loaders/default.py b/celery/loaders/default.py new file mode 100644 index 0000000..6071480 --- /dev/null +++ b/celery/loaders/default.py @@ -0,0 +1,52 @@ +# -*- coding: utf-8 -*- +""" + celery.loaders.default + ~~~~~~~~~~~~~~~~~~~~~~ + + The default loader used when no custom app has been initialized. + +""" +from __future__ import absolute_import + +import os +import warnings + +from celery.datastructures import DictAttribute +from celery.exceptions import NotConfigured +from celery.utils import strtobool + +from .base import BaseLoader + +__all__ = ['Loader', 'DEFAULT_CONFIG_MODULE'] + +DEFAULT_CONFIG_MODULE = 'celeryconfig' + +#: Warns if configuration file is missing if :envvar:`C_WNOCONF` is set. +C_WNOCONF = strtobool(os.environ.get('C_WNOCONF', False)) + + +class Loader(BaseLoader): + """The loader used by the default app.""" + + def setup_settings(self, settingsdict): + return DictAttribute(settingsdict) + + def read_configuration(self, fail_silently=True): + """Read configuration from :file:`celeryconfig.py` and configure + celery and Django so it can be used by regular Python.""" + configname = os.environ.get('CELERY_CONFIG_MODULE', + DEFAULT_CONFIG_MODULE) + try: + usercfg = self._import_config_module(configname) + except ImportError: + if not fail_silently: + raise + # billiard sets this if forked using execv + if C_WNOCONF and not os.environ.get('FORKED_BY_MULTIPROCESSING'): + warnings.warn(NotConfigured( + 'No {module} module found! Please make sure it exists and ' + 'is available to Python.'.format(module=configname))) + return self.setup_settings({}) + else: + self.configured = True + return self.setup_settings(usercfg) diff --git a/celery/local.py b/celery/local.py new file mode 100644 index 0000000..1a10c2d --- /dev/null +++ b/celery/local.py @@ -0,0 +1,283 @@ +# -*- coding: utf-8 -*- +""" + celery.local + ~~~~~~~~~~~~ + + This module contains critical utilities that + needs to be loaded as soon as possible, and that + shall not load any third party modules. + + Parts of this module is Copyright by Werkzeug Team. + +""" +from __future__ import absolute_import + +import importlib +import sys + +from .five import string + +__all__ = ['Proxy', 'PromiseProxy', 'try_import', 'maybe_evaluate'] + +__module__ = __name__ # used by Proxy class body + +PY3 = sys.version_info[0] == 3 + + +def _default_cls_attr(name, type_, cls_value): + # Proxy uses properties to forward the standard + # class attributes __module__, __name__ and __doc__ to the real + # object, but these needs to be a string when accessed from + # the Proxy class directly. This is a hack to make that work. + # -- See Issue #1087. + + def __new__(cls, getter): + instance = type_.__new__(cls, cls_value) + instance.__getter = getter + return instance + + def __get__(self, obj, cls=None): + return self.__getter(obj) if obj is not None else self + + return type(name, (type_, ), { + '__new__': __new__, '__get__': __get__, + }) + + +def try_import(module, default=None): + """Try to import and return module, or return + None if the module does not exist.""" + try: + return importlib.import_module(module) + except ImportError: + return default + + +class Proxy(object): + """Proxy to another object.""" + + # Code stolen from werkzeug.local.Proxy. + __slots__ = ('__local', '__args', '__kwargs', '__dict__') + + def __init__(self, local, + args=None, kwargs=None, name=None, __doc__=None): + object.__setattr__(self, '_Proxy__local', local) + object.__setattr__(self, '_Proxy__args', args or ()) + object.__setattr__(self, '_Proxy__kwargs', kwargs or {}) + if name is not None: + object.__setattr__(self, '__custom_name__', name) + if __doc__ is not None: + object.__setattr__(self, '__doc__', __doc__) + + @_default_cls_attr('name', str, __name__) + def __name__(self): + try: + return self.__custom_name__ + except AttributeError: + return self._get_current_object().__name__ + + @_default_cls_attr('module', str, __module__) + def __module__(self): + return self._get_current_object().__module__ + + @_default_cls_attr('doc', str, __doc__) + def __doc__(self): + return self._get_current_object().__doc__ + + def _get_class(self): + return self._get_current_object().__class__ + + @property + def __class__(self): + return self._get_class() + + def _get_current_object(self): + """Return the current object. This is useful if you want the real + object behind the proxy at a time for performance reasons or because + you want to pass the object into a different context. + """ + loc = object.__getattribute__(self, '_Proxy__local') + if not hasattr(loc, '__release_local__'): + return loc(*self.__args, **self.__kwargs) + try: + return getattr(loc, self.__name__) + except AttributeError: + raise RuntimeError('no object bound to {0.__name__}'.format(self)) + + @property + def __dict__(self): + try: + return self._get_current_object().__dict__ + except RuntimeError: # pragma: no cover + raise AttributeError('__dict__') + + def __repr__(self): + try: + obj = self._get_current_object() + except RuntimeError: # pragma: no cover + return '<{0} unbound>'.format(self.__class__.__name__) + return repr(obj) + + def __bool__(self): + try: + return bool(self._get_current_object()) + except RuntimeError: # pragma: no cover + return False + __nonzero__ = __bool__ # Py2 + + def __unicode__(self): + try: + return string(self._get_current_object()) + except RuntimeError: # pragma: no cover + return repr(self) + + def __dir__(self): + try: + return dir(self._get_current_object()) + except RuntimeError: # pragma: no cover + return [] + + def __getattr__(self, name): + if name == '__members__': + return dir(self._get_current_object()) + return getattr(self._get_current_object(), name) + + def __setitem__(self, key, value): + self._get_current_object()[key] = value + + def __delitem__(self, key): + del self._get_current_object()[key] + + def __setslice__(self, i, j, seq): + self._get_current_object()[i:j] = seq + + def __delslice__(self, i, j): + del self._get_current_object()[i:j] + + __setattr__ = lambda x, n, v: setattr(x._get_current_object(), n, v) + __delattr__ = lambda x, n: delattr(x._get_current_object(), n) + __str__ = lambda x: str(x._get_current_object()) + __lt__ = lambda x, o: x._get_current_object() < o + __le__ = lambda x, o: x._get_current_object() <= o + __eq__ = lambda x, o: x._get_current_object() == o + __ne__ = lambda x, o: x._get_current_object() != o + __gt__ = lambda x, o: x._get_current_object() > o + __ge__ = lambda x, o: x._get_current_object() >= o + __hash__ = lambda x: hash(x._get_current_object()) + __call__ = lambda x, *a, **kw: x._get_current_object()(*a, **kw) + __len__ = lambda x: len(x._get_current_object()) + __getitem__ = lambda x, i: x._get_current_object()[i] + __iter__ = lambda x: iter(x._get_current_object()) + __contains__ = lambda x, i: i in x._get_current_object() + __getslice__ = lambda x, i, j: x._get_current_object()[i:j] + __add__ = lambda x, o: x._get_current_object() + o + __sub__ = lambda x, o: x._get_current_object() - o + __mul__ = lambda x, o: x._get_current_object() * o + __floordiv__ = lambda x, o: x._get_current_object() // o + __mod__ = lambda x, o: x._get_current_object() % o + __divmod__ = lambda x, o: x._get_current_object().__divmod__(o) + __pow__ = lambda x, o: x._get_current_object() ** o + __lshift__ = lambda x, o: x._get_current_object() << o + __rshift__ = lambda x, o: x._get_current_object() >> o + __and__ = lambda x, o: x._get_current_object() & o + __xor__ = lambda x, o: x._get_current_object() ^ o + __or__ = lambda x, o: x._get_current_object() | o + __div__ = lambda x, o: x._get_current_object().__div__(o) + __truediv__ = lambda x, o: x._get_current_object().__truediv__(o) + __neg__ = lambda x: -(x._get_current_object()) + __pos__ = lambda x: +(x._get_current_object()) + __abs__ = lambda x: abs(x._get_current_object()) + __invert__ = lambda x: ~(x._get_current_object()) + __complex__ = lambda x: complex(x._get_current_object()) + __int__ = lambda x: int(x._get_current_object()) + __float__ = lambda x: float(x._get_current_object()) + __oct__ = lambda x: oct(x._get_current_object()) + __hex__ = lambda x: hex(x._get_current_object()) + __index__ = lambda x: x._get_current_object().__index__() + __coerce__ = lambda x, o: x._get_current_object().__coerce__(o) + __enter__ = lambda x: x._get_current_object().__enter__() + __exit__ = lambda x, *a, **kw: x._get_current_object().__exit__(*a, **kw) + __reduce__ = lambda x: x._get_current_object().__reduce__() + + if not PY3: + __cmp__ = lambda x, o: cmp(x._get_current_object(), o) # noqa + __long__ = lambda x: long(x._get_current_object()) # noqa + + +class PromiseProxy(Proxy): + """This is a proxy to an object that has not yet been evaulated. + + :class:`Proxy` will evaluate the object each time, while the + promise will only evaluate it once. + + """ + + __slots__ = ('__pending__', ) + + def _get_current_object(self): + try: + return object.__getattribute__(self, '__thing') + except AttributeError: + return self.__evaluate__() + + def __then__(self, fun, *args, **kwargs): + if self.__evaluated__(): + return fun(*args, **kwargs) + from collections import deque + try: + pending = object.__getattribute__(self, '__pending__') + except AttributeError: + pending = None + if pending is None: + pending = deque() + object.__setattr__(self, '__pending__', pending) + pending.append((fun, args, kwargs)) + + def __evaluated__(self): + try: + object.__getattribute__(self, '__thing') + except AttributeError: + return False + return True + + def __maybe_evaluate__(self): + return self._get_current_object() + + def __evaluate__(self, + _clean=('_Proxy__local', + '_Proxy__args', + '_Proxy__kwargs')): + try: + thing = Proxy._get_current_object(self) + except: + raise + else: + object.__setattr__(self, '__thing', thing) + for attr in _clean: + try: + object.__delattr__(self, attr) + except AttributeError: # pragma: no cover + # May mask errors so ignore + pass + try: + pending = object.__getattribute__(self, '__pending__') + except AttributeError: + pass + else: + try: + while pending: + fun, args, kwargs = pending.popleft() + fun(*args, **kwargs) + finally: + try: + object.__delattr__(self, '__pending__') + except AttributeError: + pass + return thing + + +def maybe_evaluate(obj): + try: + return obj.__maybe_evaluate__() + except AttributeError: + return obj diff --git a/celery/platforms.py b/celery/platforms.py new file mode 100644 index 0000000..03914f1 --- /dev/null +++ b/celery/platforms.py @@ -0,0 +1,766 @@ +# -*- coding: utf-8 -*- +""" + celery.platforms + ~~~~~~~~~~~~~~~~ + + Utilities dealing with platform specifics: signals, daemonization, + users, groups, and so on. + +""" +from __future__ import absolute_import, print_function + +import atexit +import errno +import math +import numbers +import os +import platform as _platform +import signal as _signal +import sys +import warnings + +from collections import namedtuple + +from billiard import current_process +# fileno used to be in this module +from kombu.utils import maybe_fileno +from kombu.utils.compat import get_errno +from kombu.utils.encoding import safe_str +from contextlib import contextmanager + +from .local import try_import +from .five import items, range, reraise, string_t, zip_longest +from .utils.functional import uniq + +_setproctitle = try_import('setproctitle') +resource = try_import('resource') +pwd = try_import('pwd') +grp = try_import('grp') + +__all__ = ['EX_OK', 'EX_FAILURE', 'EX_UNAVAILABLE', 'EX_USAGE', 'SYSTEM', + 'IS_OSX', 'IS_WINDOWS', 'pyimplementation', 'LockFailed', + 'get_fdmax', 'Pidfile', 'create_pidlock', + 'close_open_fds', 'DaemonContext', 'detached', 'parse_uid', + 'parse_gid', 'setgroups', 'initgroups', 'setgid', 'setuid', + 'maybe_drop_privileges', 'signals', 'set_process_title', + 'set_mp_process_title', 'get_errno_name', 'ignore_errno'] + +# exitcodes +EX_OK = getattr(os, 'EX_OK', 0) +EX_FAILURE = 1 +EX_UNAVAILABLE = getattr(os, 'EX_UNAVAILABLE', 69) +EX_USAGE = getattr(os, 'EX_USAGE', 64) +EX_CANTCREAT = getattr(os, 'EX_CANTCREAT', 73) + +SYSTEM = _platform.system() +IS_OSX = SYSTEM == 'Darwin' +IS_WINDOWS = SYSTEM == 'Windows' + +DAEMON_WORKDIR = '/' + +PIDFILE_FLAGS = os.O_CREAT | os.O_EXCL | os.O_WRONLY +PIDFILE_MODE = ((os.R_OK | os.W_OK) << 6) | ((os.R_OK) << 3) | ((os.R_OK)) + +PIDLOCKED = """ERROR: Pidfile ({0}) already exists. +Seems we're already running? (pid: {1})""" + +_range = namedtuple('_range', ('start', 'stop')) + +C_FORCE_ROOT = os.environ.get('C_FORCE_ROOT', False) + +ROOT_DISALLOWED = """\ +Running a worker with superuser privileges when the +worker accepts messages serialized with pickle is a very bad idea! + +If you really want to continue then you have to set the C_FORCE_ROOT +environment variable (but please think about this before you do). + +User information: uid={uid} euid={euid} gid={gid} egid={egid} +""" + +ROOT_DISCOURAGED = """\ +You are running the worker with superuser privileges, which is +absolutely not recommended! + +Please specify a different user using the -u option. + +User information: uid={uid} euid={euid} gid={gid} egid={egid} +""" + + +def pyimplementation(): + """Return string identifying the current Python implementation.""" + if hasattr(_platform, 'python_implementation'): + return _platform.python_implementation() + elif sys.platform.startswith('java'): + return 'Jython ' + sys.platform + elif hasattr(sys, 'pypy_version_info'): + v = '.'.join(str(p) for p in sys.pypy_version_info[:3]) + if sys.pypy_version_info[3:]: + v += '-' + ''.join(str(p) for p in sys.pypy_version_info[3:]) + return 'PyPy ' + v + else: + return 'CPython' + + +class LockFailed(Exception): + """Raised if a pidlock can't be acquired.""" + + +def get_fdmax(default=None): + """Return the maximum number of open file descriptors + on this system. + + :keyword default: Value returned if there's no file + descriptor limit. + + """ + try: + return os.sysconf('SC_OPEN_MAX') + except: + pass + if resource is None: # Windows + return default + fdmax = resource.getrlimit(resource.RLIMIT_NOFILE)[1] + if fdmax == resource.RLIM_INFINITY: + return default + return fdmax + + +class Pidfile(object): + """Pidfile + + This is the type returned by :func:`create_pidlock`. + + TIP: Use the :func:`create_pidlock` function instead, + which is more convenient and also removes stale pidfiles (when + the process holding the lock is no longer running). + + """ + + #: Path to the pid lock file. + path = None + + def __init__(self, path): + self.path = os.path.abspath(path) + + def acquire(self): + """Acquire lock.""" + try: + self.write_pid() + except OSError as exc: + reraise(LockFailed, LockFailed(str(exc)), sys.exc_info()[2]) + return self + __enter__ = acquire + + def is_locked(self): + """Return true if the pid lock exists.""" + return os.path.exists(self.path) + + def release(self, *args): + """Release lock.""" + self.remove() + __exit__ = release + + def read_pid(self): + """Read and return the current pid.""" + with ignore_errno('ENOENT'): + with open(self.path, 'r') as fh: + line = fh.readline() + if line.strip() == line: # must contain '\n' + raise ValueError( + 'Partial or invalid pidfile {0.path}'.format(self)) + + try: + return int(line.strip()) + except ValueError: + raise ValueError( + 'pidfile {0.path} contents invalid.'.format(self)) + + def remove(self): + """Remove the lock.""" + with ignore_errno(errno.ENOENT, errno.EACCES): + os.unlink(self.path) + + def remove_if_stale(self): + """Remove the lock if the process is not running. + (does not respond to signals).""" + try: + pid = self.read_pid() + except ValueError as exc: + print('Broken pidfile found. Removing it.', file=sys.stderr) + self.remove() + return True + if not pid: + self.remove() + return True + + try: + os.kill(pid, 0) + except os.error as exc: + if exc.errno == errno.ESRCH: + print('Stale pidfile exists. Removing it.', file=sys.stderr) + self.remove() + return True + return False + + def write_pid(self): + pid = os.getpid() + content = '{0}\n'.format(pid) + + pidfile_fd = os.open(self.path, PIDFILE_FLAGS, PIDFILE_MODE) + pidfile = os.fdopen(pidfile_fd, 'w') + try: + pidfile.write(content) + # flush and sync so that the re-read below works. + pidfile.flush() + try: + os.fsync(pidfile_fd) + except AttributeError: # pragma: no cover + pass + finally: + pidfile.close() + + rfh = open(self.path) + try: + if rfh.read() != content: + raise LockFailed( + "Inconsistency: Pidfile content doesn't match at re-read") + finally: + rfh.close() +PIDFile = Pidfile # compat alias + + +def create_pidlock(pidfile): + """Create and verify pidfile. + + If the pidfile already exists the program exits with an error message, + however if the process it refers to is not running anymore, the pidfile + is deleted and the program continues. + + This function will automatically install an :mod:`atexit` handler + to release the lock at exit, you can skip this by calling + :func:`_create_pidlock` instead. + + :returns: :class:`Pidfile`. + + **Example**: + + .. code-block:: python + + pidlock = create_pidlock('/var/run/app.pid') + + """ + pidlock = _create_pidlock(pidfile) + atexit.register(pidlock.release) + return pidlock + + +def _create_pidlock(pidfile): + pidlock = Pidfile(pidfile) + if pidlock.is_locked() and not pidlock.remove_if_stale(): + print(PIDLOCKED.format(pidfile, pidlock.read_pid()), file=sys.stderr) + raise SystemExit(EX_CANTCREAT) + pidlock.acquire() + return pidlock + + +if hasattr(os, 'closerange'): + + def close_open_fds(keep=None): + # must make sure this is 0-inclusive (Issue #1882) + keep = list(uniq(sorted( + f for f in map(maybe_fileno, keep or []) if f is not None + ))) + maxfd = get_fdmax(default=2048) + kL, kH = iter([-1] + keep), iter(keep + [maxfd]) + for low, high in zip_longest(kL, kH): + if low + 1 != high: + os.closerange(low + 1, high) + +else: + + def close_open_fds(keep=None): # noqa + keep = [maybe_fileno(f) + for f in (keep or []) if maybe_fileno(f) is not None] + for fd in reversed(range(get_fdmax(default=2048))): + if fd not in keep: + with ignore_errno(errno.EBADF): + os.close(fd) + + +class DaemonContext(object): + _is_open = False + + def __init__(self, pidfile=None, workdir=None, umask=None, + fake=False, after_chdir=None, **kwargs): + if isinstance(umask, string_t): + umask = int(umask, 8) # convert str -> octal + self.workdir = workdir or DAEMON_WORKDIR + self.umask = umask + self.fake = fake + self.after_chdir = after_chdir + self.stdfds = (sys.stdin, sys.stdout, sys.stderr) + + def redirect_to_null(self, fd): + if fd is not None: + dest = os.open(os.devnull, os.O_RDWR) + os.dup2(dest, fd) + + def open(self): + if not self._is_open: + if not self.fake: + self._detach() + + os.chdir(self.workdir) + if self.umask is not None: + os.umask(self.umask) + + if self.after_chdir: + self.after_chdir() + + if not self.fake: + close_open_fds(self.stdfds) + for fd in self.stdfds: + self.redirect_to_null(maybe_fileno(fd)) + + self._is_open = True + __enter__ = open + + def close(self, *args): + if self._is_open: + self._is_open = False + __exit__ = close + + def _detach(self): + if os.fork() == 0: # first child + os.setsid() # create new session + if os.fork() > 0: # second child + os._exit(0) + else: + os._exit(0) + return self + + +def detached(logfile=None, pidfile=None, uid=None, gid=None, umask=0, + workdir=None, fake=False, **opts): + """Detach the current process in the background (daemonize). + + :keyword logfile: Optional log file. The ability to write to this file + will be verified before the process is detached. + :keyword pidfile: Optional pidfile. The pidfile will not be created, + as this is the responsibility of the child. But the process will + exit if the pid lock exists and the pid written is still running. + :keyword uid: Optional user id or user name to change + effective privileges to. + :keyword gid: Optional group id or group name to change effective + privileges to. + :keyword umask: Optional umask that will be effective in the child process. + :keyword workdir: Optional new working directory. + :keyword fake: Don't actually detach, intented for debugging purposes. + :keyword \*\*opts: Ignored. + + **Example**: + + .. code-block:: python + + from celery.platforms import detached, create_pidlock + + with detached(logfile='/var/log/app.log', pidfile='/var/run/app.pid', + uid='nobody'): + # Now in detached child process with effective user set to nobody, + # and we know that our logfile can be written to, and that + # the pidfile is not locked. + pidlock = create_pidlock('/var/run/app.pid') + + # Run the program + program.run(logfile='/var/log/app.log') + + """ + + if not resource: + raise RuntimeError('This platform does not support detach.') + workdir = os.getcwd() if workdir is None else workdir + + signals.reset('SIGCLD') # Make sure SIGCLD is using the default handler. + maybe_drop_privileges(uid=uid, gid=gid) + + def after_chdir_do(): + # Since without stderr any errors will be silently suppressed, + # we need to know that we have access to the logfile. + logfile and open(logfile, 'a').close() + # Doesn't actually create the pidfile, but makes sure it's not stale. + if pidfile: + _create_pidlock(pidfile).release() + + return DaemonContext( + umask=umask, workdir=workdir, fake=fake, after_chdir=after_chdir_do, + ) + + +def parse_uid(uid): + """Parse user id. + + uid can be an integer (uid) or a string (user name), if a user name + the uid is taken from the system user registry. + + """ + try: + return int(uid) + except ValueError: + try: + return pwd.getpwnam(uid).pw_uid + except (AttributeError, KeyError): + raise KeyError('User does not exist: {0}'.format(uid)) + + +def parse_gid(gid): + """Parse group id. + + gid can be an integer (gid) or a string (group name), if a group name + the gid is taken from the system group registry. + + """ + try: + return int(gid) + except ValueError: + try: + return grp.getgrnam(gid).gr_gid + except (AttributeError, KeyError): + raise KeyError('Group does not exist: {0}'.format(gid)) + + +def _setgroups_hack(groups): + """:fun:`setgroups` may have a platform-dependent limit, + and it is not always possible to know in advance what this limit + is, so we use this ugly hack stolen from glibc.""" + groups = groups[:] + + while 1: + try: + return os.setgroups(groups) + except ValueError: # error from Python's check. + if len(groups) <= 1: + raise + groups[:] = groups[:-1] + except OSError as exc: # error from the OS. + if exc.errno != errno.EINVAL or len(groups) <= 1: + raise + groups[:] = groups[:-1] + + +def setgroups(groups): + """Set active groups from a list of group ids.""" + max_groups = None + try: + max_groups = os.sysconf('SC_NGROUPS_MAX') + except Exception: + pass + try: + return _setgroups_hack(groups[:max_groups]) + except OSError as exc: + if exc.errno != errno.EPERM: + raise + if any(group not in groups for group in os.getgroups()): + # we shouldn't be allowed to change to this group. + raise + + +def initgroups(uid, gid): + """Compat version of :func:`os.initgroups` which was first + added to Python 2.7.""" + if not pwd: # pragma: no cover + return + username = pwd.getpwuid(uid)[0] + if hasattr(os, 'initgroups'): # Python 2.7+ + return os.initgroups(username, gid) + groups = [gr.gr_gid for gr in grp.getgrall() + if username in gr.gr_mem] + setgroups(groups) + + +def setgid(gid): + """Version of :func:`os.setgid` supporting group names.""" + os.setgid(parse_gid(gid)) + + +def setuid(uid): + """Version of :func:`os.setuid` supporting usernames.""" + os.setuid(parse_uid(uid)) + + +def maybe_drop_privileges(uid=None, gid=None): + """Change process privileges to new user/group. + + If UID and GID is specified, the real user/group is changed. + + If only UID is specified, the real user is changed, and the group is + changed to the users primary group. + + If only GID is specified, only the group is changed. + + """ + if sys.platform == 'win32': + return + if os.geteuid(): + # no point trying to setuid unless we're root. + if not os.getuid(): + raise AssertionError('contact support') + uid = uid and parse_uid(uid) + gid = gid and parse_gid(gid) + + if uid: + # If GID isn't defined, get the primary GID of the user. + if not gid and pwd: + gid = pwd.getpwuid(uid).pw_gid + # Must set the GID before initgroups(), as setgid() + # is known to zap the group list on some platforms. + + # setgid must happen before setuid (otherwise the setgid operation + # may fail because of insufficient privileges and possibly stay + # in a privileged group). + setgid(gid) + initgroups(uid, gid) + + # at last: + setuid(uid) + # ... and make sure privileges cannot be restored: + try: + setuid(0) + except OSError as exc: + if get_errno(exc) != errno.EPERM: + raise + pass # Good: cannot restore privileges. + else: + raise RuntimeError( + 'non-root user able to restore privileges after setuid.') + else: + gid and setgid(gid) + + if uid and (not os.getuid()) and not (os.geteuid()): + raise AssertionError('Still root uid after drop privileges!') + if gid and (not os.getgid()) and not (os.getegid()): + raise AssertionError('Still root gid after drop privileges!') + + +class Signals(object): + """Convenience interface to :mod:`signals`. + + If the requested signal is not supported on the current platform, + the operation will be ignored. + + **Examples**: + + .. code-block:: python + + >>> from celery.platforms import signals + + >>> from proj.handlers import my_handler + >>> signals['INT'] = my_handler + + >>> signals['INT'] + my_handler + + >>> signals.supported('INT') + True + + >>> signals.signum('INT') + 2 + + >>> signals.ignore('USR1') + >>> signals['USR1'] == signals.ignored + True + + >>> signals.reset('USR1') + >>> signals['USR1'] == signals.default + True + + >>> from proj.handlers import exit_handler, hup_handler + >>> signals.update(INT=exit_handler, + ... TERM=exit_handler, + ... HUP=hup_handler) + + """ + + ignored = _signal.SIG_IGN + default = _signal.SIG_DFL + + if hasattr(_signal, 'setitimer'): + + def arm_alarm(self, seconds): + _signal.setitimer(_signal.ITIMER_REAL, seconds) + else: # pragma: no cover + try: + from itimer import alarm as _itimer_alarm # noqa + except ImportError: + + def arm_alarm(self, seconds): # noqa + _signal.alarm(math.ceil(seconds)) + else: # pragma: no cover + + def arm_alarm(self, seconds): # noqa + return _itimer_alarm(seconds) # noqa + + def reset_alarm(self): + return _signal.alarm(0) + + def supported(self, signal_name): + """Return true value if ``signal_name`` exists on this platform.""" + try: + return self.signum(signal_name) + except AttributeError: + pass + + def signum(self, signal_name): + """Get signal number from signal name.""" + if isinstance(signal_name, numbers.Integral): + return signal_name + if not isinstance(signal_name, string_t) \ + or not signal_name.isupper(): + raise TypeError('signal name must be uppercase string.') + if not signal_name.startswith('SIG'): + signal_name = 'SIG' + signal_name + return getattr(_signal, signal_name) + + def reset(self, *signal_names): + """Reset signals to the default signal handler. + + Does nothing if the platform doesn't support signals, + or the specified signal in particular. + + """ + self.update((sig, self.default) for sig in signal_names) + + def ignore(self, *signal_names): + """Ignore signal using :const:`SIG_IGN`. + + Does nothing if the platform doesn't support signals, + or the specified signal in particular. + + """ + self.update((sig, self.ignored) for sig in signal_names) + + def __getitem__(self, signal_name): + return _signal.getsignal(self.signum(signal_name)) + + def __setitem__(self, signal_name, handler): + """Install signal handler. + + Does nothing if the current platform doesn't support signals, + or the specified signal in particular. + + """ + try: + _signal.signal(self.signum(signal_name), handler) + except (AttributeError, ValueError): + pass + + def update(self, _d_=None, **sigmap): + """Set signal handlers from a mapping.""" + for signal_name, handler in items(dict(_d_ or {}, **sigmap)): + self[signal_name] = handler + +signals = Signals() +get_signal = signals.signum # compat +install_signal_handler = signals.__setitem__ # compat +reset_signal = signals.reset # compat +ignore_signal = signals.ignore # compat + + +def strargv(argv): + arg_start = 2 if 'manage' in argv[0] else 1 + if len(argv) > arg_start: + return ' '.join(argv[arg_start:]) + return '' + + +def set_process_title(progname, info=None): + """Set the ps name for the currently running process. + + Only works if :mod:`setproctitle` is installed. + + """ + proctitle = '[{0}]'.format(progname) + proctitle = '{0} {1}'.format(proctitle, info) if info else proctitle + if _setproctitle: + _setproctitle.setproctitle(safe_str(proctitle)) + return proctitle + + +if os.environ.get('NOSETPS'): # pragma: no cover + + def set_mp_process_title(*a, **k): + pass +else: + + def set_mp_process_title(progname, info=None, hostname=None): # noqa + """Set the ps name using the multiprocessing process name. + + Only works if :mod:`setproctitle` is installed. + + """ + if hostname: + progname = '{0}: {1}'.format(progname, hostname) + return set_process_title( + '{0}:{1}'.format(progname, current_process().name), info=info) + + +def get_errno_name(n): + """Get errno for string, e.g. ``ENOENT``.""" + if isinstance(n, string_t): + return getattr(errno, n) + return n + + +@contextmanager +def ignore_errno(*errnos, **kwargs): + """Context manager to ignore specific POSIX error codes. + + Takes a list of error codes to ignore, which can be either + the name of the code, or the code integer itself:: + + >>> with ignore_errno('ENOENT'): + ... with open('foo', 'r') as fh: + ... return fh.read() + + >>> with ignore_errno(errno.ENOENT, errno.EPERM): + ... pass + + :keyword types: A tuple of exceptions to ignore (when the errno matches), + defaults to :exc:`Exception`. + """ + types = kwargs.get('types') or (Exception, ) + errnos = [get_errno_name(errno) for errno in errnos] + try: + yield + except types as exc: + if not hasattr(exc, 'errno'): + raise + if exc.errno not in errnos: + raise + + +def check_privileges(accept_content): + uid = os.getuid() if hasattr(os, 'getuid') else 65535 + gid = os.getgid() if hasattr(os, 'getgid') else 65535 + euid = os.geteuid() if hasattr(os, 'geteuid') else 65535 + egid = os.getegid() if hasattr(os, 'getegid') else 65535 + + if hasattr(os, 'fchown'): + if not all(hasattr(os, attr) + for attr in ['getuid', 'getgid', 'geteuid', 'getegid']): + raise AssertionError('suspicious platform, contact support') + + if not uid or not gid or not euid or not egid: + if ('pickle' in accept_content or + 'application/x-python-serialize' in accept_content): + if not C_FORCE_ROOT: + try: + print(ROOT_DISALLOWED.format( + uid=uid, euid=euid, gid=gid, egid=egid, + ), file=sys.stderr) + finally: + os._exit(1) + warnings.warn(RuntimeWarning(ROOT_DISCOURAGED.format( + uid=uid, euid=euid, gid=gid, egid=egid, + ))) diff --git a/celery/result.py b/celery/result.py new file mode 100644 index 0000000..a6c33e1 --- /dev/null +++ b/celery/result.py @@ -0,0 +1,917 @@ +# -*- coding: utf-8 -*- +""" + celery.result + ~~~~~~~~~~~~~ + + Task results/state and groups of results. + +""" +from __future__ import absolute_import + +import time +import warnings + +from collections import deque +from contextlib import contextmanager +from copy import copy + +from kombu.utils import cached_property +from kombu.utils.compat import OrderedDict + +from . import current_app +from . import states +from ._state import _set_task_join_will_block, task_join_will_block +from .app import app_or_default +from .datastructures import DependencyGraph, GraphFormatter +from .exceptions import IncompleteStream, TimeoutError +from .five import items, range, string_t, monotonic +from .utils import deprecated + +__all__ = ['ResultBase', 'AsyncResult', 'ResultSet', 'GroupResult', + 'EagerResult', 'result_from_tuple'] + +E_WOULDBLOCK = """\ +Never call result.get() within a task! +See http://docs.celeryq.org/en/latest/userguide/tasks.html\ +#task-synchronous-subtasks + +In Celery 3.2 this will result in an exception being +raised instead of just being a warning. +""" + + +def assert_will_not_block(): + if task_join_will_block(): + warnings.warn(RuntimeWarning(E_WOULDBLOCK)) + + +@contextmanager +def allow_join_result(): + reset_value = task_join_will_block() + _set_task_join_will_block(False) + try: + yield + finally: + _set_task_join_will_block(reset_value) + + +class ResultBase(object): + """Base class for all results""" + + #: Parent result (if part of a chain) + parent = None + + +class AsyncResult(ResultBase): + """Query task state. + + :param id: see :attr:`id`. + :keyword backend: see :attr:`backend`. + + """ + app = None + + #: Error raised for timeouts. + TimeoutError = TimeoutError + + #: The task's UUID. + id = None + + #: The task result backend to use. + backend = None + + def __init__(self, id, backend=None, task_name=None, + app=None, parent=None): + self.app = app_or_default(app or self.app) + self.id = id + self.backend = backend or self.app.backend + self.task_name = task_name + self.parent = parent + self._cache = None + + def as_tuple(self): + parent = self.parent + return (self.id, parent and parent.as_tuple()), None + serializable = as_tuple # XXX compat + + def forget(self): + """Forget about (and possibly remove the result of) this task.""" + self._cache = None + self.backend.forget(self.id) + + def revoke(self, connection=None, terminate=False, signal=None, + wait=False, timeout=None): + """Send revoke signal to all workers. + + Any worker receiving the task, or having reserved the + task, *must* ignore it. + + :keyword terminate: Also terminate the process currently working + on the task (if any). + :keyword signal: Name of signal to send to process if terminate. + Default is TERM. + :keyword wait: Wait for replies from workers. Will wait for 1 second + by default or you can specify a custom ``timeout``. + :keyword timeout: Time in seconds to wait for replies if ``wait`` + enabled. + + """ + self.app.control.revoke(self.id, connection=connection, + terminate=terminate, signal=signal, + reply=wait, timeout=timeout) + + def get(self, timeout=None, propagate=True, interval=0.5, no_ack=True, + follow_parents=True): + """Wait until task is ready, and return its result. + + .. warning:: + + Waiting for tasks within a task may lead to deadlocks. + Please read :ref:`task-synchronous-subtasks`. + + :keyword timeout: How long to wait, in seconds, before the + operation times out. + :keyword propagate: Re-raise exception if the task failed. + :keyword interval: Time to wait (in seconds) before retrying to + retrieve the result. Note that this does not have any effect + when using the amqp result store backend, as it does not + use polling. + :keyword no_ack: Enable amqp no ack (automatically acknowledge + message). If this is :const:`False` then the message will + **not be acked**. + :keyword follow_parents: Reraise any exception raised by parent task. + + :raises celery.exceptions.TimeoutError: if `timeout` is not + :const:`None` and the result does not arrive within `timeout` + seconds. + + If the remote call raised an exception then that exception will + be re-raised. + + """ + assert_will_not_block() + on_interval = None + if follow_parents and propagate and self.parent: + on_interval = self._maybe_reraise_parent_error + on_interval() + + if self._cache: + if propagate: + self.maybe_reraise() + return self.result + + try: + return self.backend.wait_for( + self.id, timeout=timeout, + propagate=propagate, + interval=interval, + on_interval=on_interval, + no_ack=no_ack, + ) + finally: + self._get_task_meta() # update self._cache + wait = get # deprecated alias to :meth:`get`. + + def _maybe_reraise_parent_error(self): + for node in reversed(list(self._parents())): + node.maybe_reraise() + + def _parents(self): + node = self.parent + while node: + yield node + node = node.parent + + def collect(self, intermediate=False, **kwargs): + """Iterator, like :meth:`get` will wait for the task to complete, + but will also follow :class:`AsyncResult` and :class:`ResultSet` + returned by the task, yielding ``(result, value)`` tuples for each + result in the tree. + + An example would be having the following tasks: + + .. code-block:: python + + from celery import group + from proj.celery import app + + @app.task(trail=True) + def A(how_many): + return group(B.s(i) for i in range(how_many))() + + @app.task(trail=True) + def B(i): + return pow2.delay(i) + + @app.task(trail=True) + def pow2(i): + return i ** 2 + + Note that the ``trail`` option must be enabled + so that the list of children is stored in ``result.children``. + This is the default but enabled explicitly for illustration. + + Calling :meth:`collect` would return: + + .. code-block:: python + + >>> from celery.result import ResultBase + >>> from proj.tasks import A + + >>> result = A.delay(10) + >>> [v for v in result.collect() + ... if not isinstance(v, (ResultBase, tuple))] + [0, 1, 4, 9, 16, 25, 36, 49, 64, 81] + + """ + for _, R in self.iterdeps(intermediate=intermediate): + yield R, R.get(**kwargs) + + def get_leaf(self): + value = None + for _, R in self.iterdeps(): + value = R.get() + return value + + def iterdeps(self, intermediate=False): + stack = deque([(None, self)]) + + while stack: + parent, node = stack.popleft() + yield parent, node + if node.ready(): + stack.extend((node, child) for child in node.children or []) + else: + if not intermediate: + raise IncompleteStream() + + def ready(self): + """Returns :const:`True` if the task has been executed. + + If the task is still running, pending, or is waiting + for retry then :const:`False` is returned. + + """ + return self.state in self.backend.READY_STATES + + def successful(self): + """Returns :const:`True` if the task executed successfully.""" + return self.state == states.SUCCESS + + def failed(self): + """Returns :const:`True` if the task failed.""" + return self.state == states.FAILURE + + def maybe_reraise(self): + if self.state in states.PROPAGATE_STATES: + raise self.result + + def build_graph(self, intermediate=False, formatter=None): + graph = DependencyGraph( + formatter=formatter or GraphFormatter(root=self.id, shape='oval'), + ) + for parent, node in self.iterdeps(intermediate=intermediate): + graph.add_arc(node) + if parent: + graph.add_edge(parent, node) + return graph + + def __str__(self): + """`str(self) -> self.id`""" + return str(self.id) + + def __hash__(self): + """`hash(self) -> hash(self.id)`""" + return hash(self.id) + + def __repr__(self): + return '<{0}: {1}>'.format(type(self).__name__, self.id) + + def __eq__(self, other): + if isinstance(other, AsyncResult): + return other.id == self.id + elif isinstance(other, string_t): + return other == self.id + return NotImplemented + + def __ne__(self, other): + return not self.__eq__(other) + + def __copy__(self): + return self.__class__( + self.id, self.backend, self.task_name, self.app, self.parent, + ) + + def __reduce__(self): + return self.__class__, self.__reduce_args__() + + def __reduce_args__(self): + return self.id, self.backend, self.task_name, None, self.parent + + def __del__(self): + self._cache = None + + @cached_property + def graph(self): + return self.build_graph() + + @property + def supports_native_join(self): + return self.backend.supports_native_join + + @property + def children(self): + return self._get_task_meta().get('children') + + def _get_task_meta(self): + if self._cache is None: + meta = self.backend.get_task_meta(self.id) + if meta: + state = meta['status'] + if state == states.SUCCESS or state in states.PROPAGATE_STATES: + return self._set_cache(meta) + return meta + return self._cache + + def _set_cache(self, d): + state, children = d['status'], d.get('children') + if state in states.EXCEPTION_STATES: + d['result'] = self.backend.exception_to_python(d['result']) + if children: + d['children'] = [ + result_from_tuple(child, self.app) for child in children + ] + self._cache = d + return d + + @property + def result(self): + """When the task has been executed, this contains the return value. + If the task raised an exception, this will be the exception + instance.""" + return self._get_task_meta()['result'] + info = result + + @property + def traceback(self): + """Get the traceback of a failed task.""" + return self._get_task_meta().get('traceback') + + @property + def state(self): + """The tasks current state. + + Possible values includes: + + *PENDING* + + The task is waiting for execution. + + *STARTED* + + The task has been started. + + *RETRY* + + The task is to be retried, possibly because of failure. + + *FAILURE* + + The task raised an exception, or has exceeded the retry limit. + The :attr:`result` attribute then contains the + exception raised by the task. + + *SUCCESS* + + The task executed successfully. The :attr:`result` attribute + then contains the tasks return value. + + """ + return self._get_task_meta()['status'] + status = state + + @property + def task_id(self): + """compat alias to :attr:`id`""" + return self.id + + @task_id.setter # noqa + def task_id(self, id): + self.id = id +BaseAsyncResult = AsyncResult # for backwards compatibility. + + +class ResultSet(ResultBase): + """Working with more than one result. + + :param results: List of result instances. + + """ + app = None + + #: List of results in in the set. + results = None + + def __init__(self, results, app=None, **kwargs): + self.app = app_or_default(app or self.app) + self.results = results + + def add(self, result): + """Add :class:`AsyncResult` as a new member of the set. + + Does nothing if the result is already a member. + + """ + if result not in self.results: + self.results.append(result) + + def remove(self, result): + """Remove result from the set; it must be a member. + + :raises KeyError: if the result is not a member. + + """ + if isinstance(result, string_t): + result = self.app.AsyncResult(result) + try: + self.results.remove(result) + except ValueError: + raise KeyError(result) + + def discard(self, result): + """Remove result from the set if it is a member. + + If it is not a member, do nothing. + + """ + try: + self.remove(result) + except KeyError: + pass + + def update(self, results): + """Update set with the union of itself and an iterable with + results.""" + self.results.extend(r for r in results if r not in self.results) + + def clear(self): + """Remove all results from this set.""" + self.results[:] = [] # don't create new list. + + def successful(self): + """Was all of the tasks successful? + + :returns: :const:`True` if all of the tasks finished + successfully (i.e. did not raise an exception). + + """ + return all(result.successful() for result in self.results) + + def failed(self): + """Did any of the tasks fail? + + :returns: :const:`True` if one of the tasks failed. + (i.e., raised an exception) + + """ + return any(result.failed() for result in self.results) + + def maybe_reraise(self): + for result in self.results: + result.maybe_reraise() + + def waiting(self): + """Are any of the tasks incomplete? + + :returns: :const:`True` if one of the tasks are still + waiting for execution. + + """ + return any(not result.ready() for result in self.results) + + def ready(self): + """Did all of the tasks complete? (either by success of failure). + + :returns: :const:`True` if all of the tasks has been + executed. + + """ + return all(result.ready() for result in self.results) + + def completed_count(self): + """Task completion count. + + :returns: the number of tasks completed. + + """ + return sum(int(result.successful()) for result in self.results) + + def forget(self): + """Forget about (and possible remove the result of) all the tasks.""" + for result in self.results: + result.forget() + + def revoke(self, connection=None, terminate=False, signal=None, + wait=False, timeout=None): + """Send revoke signal to all workers for all tasks in the set. + + :keyword terminate: Also terminate the process currently working + on the task (if any). + :keyword signal: Name of signal to send to process if terminate. + Default is TERM. + :keyword wait: Wait for replies from worker. Will wait for 1 second + by default or you can specify a custom ``timeout``. + :keyword timeout: Time in seconds to wait for replies if ``wait`` + enabled. + + """ + self.app.control.revoke([r.id for r in self.results], + connection=connection, timeout=timeout, + terminate=terminate, signal=signal, reply=wait) + + def __iter__(self): + return iter(self.results) + + def __getitem__(self, index): + """`res[i] -> res.results[i]`""" + return self.results[index] + + @deprecated('3.2', '3.3') + def iterate(self, timeout=None, propagate=True, interval=0.5): + """Deprecated method, use :meth:`get` with a callback argument.""" + elapsed = 0.0 + results = OrderedDict((result.id, copy(result)) + for result in self.results) + + while results: + removed = set() + for task_id, result in items(results): + if result.ready(): + yield result.get(timeout=timeout and timeout - elapsed, + propagate=propagate) + removed.add(task_id) + else: + if result.backend.subpolling_interval: + time.sleep(result.backend.subpolling_interval) + for task_id in removed: + results.pop(task_id, None) + time.sleep(interval) + elapsed += interval + if timeout and elapsed >= timeout: + raise TimeoutError('The operation timed out') + + def get(self, timeout=None, propagate=True, interval=0.5, + callback=None, no_ack=True): + """See :meth:`join` + + This is here for API compatibility with :class:`AsyncResult`, + in addition it uses :meth:`join_native` if available for the + current result backend. + + """ + return (self.join_native if self.supports_native_join else self.join)( + timeout=timeout, propagate=propagate, + interval=interval, callback=callback, no_ack=no_ack) + + def join(self, timeout=None, propagate=True, interval=0.5, + callback=None, no_ack=True): + """Gathers the results of all tasks as a list in order. + + .. note:: + + This can be an expensive operation for result store + backends that must resort to polling (e.g. database). + + You should consider using :meth:`join_native` if your backend + supports it. + + .. warning:: + + Waiting for tasks within a task may lead to deadlocks. + Please see :ref:`task-synchronous-subtasks`. + + :keyword timeout: The number of seconds to wait for results before + the operation times out. + + :keyword propagate: If any of the tasks raises an exception, the + exception will be re-raised. + + :keyword interval: Time to wait (in seconds) before retrying to + retrieve a result from the set. Note that this + does not have any effect when using the amqp + result store backend, as it does not use polling. + + :keyword callback: Optional callback to be called for every result + received. Must have signature ``(task_id, value)`` + No results will be returned by this function if + a callback is specified. The order of results + is also arbitrary when a callback is used. + To get access to the result object for a particular + id you will have to generate an index first: + ``index = {r.id: r for r in gres.results.values()}`` + Or you can create new result objects on the fly: + ``result = app.AsyncResult(task_id)`` (both will + take advantage of the backend cache anyway). + + :keyword no_ack: Automatic message acknowledgement (Note that if this + is set to :const:`False` then the messages *will not be + acknowledged*). + + :raises celery.exceptions.TimeoutError: if ``timeout`` is not + :const:`None` and the operation takes longer than ``timeout`` + seconds. + + """ + assert_will_not_block() + time_start = monotonic() + remaining = None + + results = [] + for result in self.results: + remaining = None + if timeout: + remaining = timeout - (monotonic() - time_start) + if remaining <= 0.0: + raise TimeoutError('join operation timed out') + value = result.get( + timeout=remaining, propagate=propagate, + interval=interval, no_ack=no_ack, + ) + if callback: + callback(result.id, value) + else: + results.append(value) + return results + + def iter_native(self, timeout=None, interval=0.5, no_ack=True): + """Backend optimized version of :meth:`iterate`. + + .. versionadded:: 2.2 + + Note that this does not support collecting the results + for different task types using different backends. + + This is currently only supported by the amqp, Redis and cache + result backends. + + """ + results = self.results + if not results: + return iter([]) + return self.backend.get_many( + set(r.id for r in results), + timeout=timeout, interval=interval, no_ack=no_ack, + ) + + def join_native(self, timeout=None, propagate=True, + interval=0.5, callback=None, no_ack=True): + """Backend optimized version of :meth:`join`. + + .. versionadded:: 2.2 + + Note that this does not support collecting the results + for different task types using different backends. + + This is currently only supported by the amqp, Redis and cache + result backends. + + """ + assert_will_not_block() + order_index = None if callback else dict( + (result.id, i) for i, result in enumerate(self.results) + ) + acc = None if callback else [None for _ in range(len(self))] + for task_id, meta in self.iter_native(timeout, interval, no_ack): + value = meta['result'] + if propagate and meta['status'] in states.PROPAGATE_STATES: + raise value + if callback: + callback(task_id, value) + else: + acc[order_index[task_id]] = value + return acc + + def _failed_join_report(self): + return (res for res in self.results + if res.backend.is_cached(res.id) and + res.state in states.PROPAGATE_STATES) + + def __len__(self): + return len(self.results) + + def __eq__(self, other): + if isinstance(other, ResultSet): + return other.results == self.results + return NotImplemented + + def __ne__(self, other): + return not self.__eq__(other) + + def __repr__(self): + return '<{0}: [{1}]>'.format(type(self).__name__, + ', '.join(r.id for r in self.results)) + + @property + def subtasks(self): + """Deprecated alias to :attr:`results`.""" + return self.results + + @property + def supports_native_join(self): + try: + return self.results[0].supports_native_join + except IndexError: + pass + + @property + def backend(self): + return self.app.backend if self.app else self.results[0].backend + + +class GroupResult(ResultSet): + """Like :class:`ResultSet`, but with an associated id. + + This type is returned by :class:`~celery.group`, and the + deprecated TaskSet, meth:`~celery.task.TaskSet.apply_async` method. + + It enables inspection of the tasks state and return values as + a single entity. + + :param id: The id of the group. + :param results: List of result instances. + + """ + + #: The UUID of the group. + id = None + + #: List/iterator of results in the group + results = None + + def __init__(self, id=None, results=None, **kwargs): + self.id = id + ResultSet.__init__(self, results, **kwargs) + + def save(self, backend=None): + """Save group-result for later retrieval using :meth:`restore`. + + Example:: + + >>> def save_and_restore(result): + ... result.save() + ... result = GroupResult.restore(result.id) + + """ + return (backend or self.app.backend).save_group(self.id, self) + + def delete(self, backend=None): + """Remove this result if it was previously saved.""" + (backend or self.app.backend).delete_group(self.id) + + def __reduce__(self): + return self.__class__, self.__reduce_args__() + + def __reduce_args__(self): + return self.id, self.results + + def __eq__(self, other): + if isinstance(other, GroupResult): + return other.id == self.id and other.results == self.results + return NotImplemented + + def __ne__(self, other): + return not self.__eq__(other) + + def __repr__(self): + return '<{0}: {1} [{2}]>'.format(type(self).__name__, self.id, + ', '.join(r.id for r in self.results)) + + def as_tuple(self): + return self.id, [r.as_tuple() for r in self.results] + serializable = as_tuple # XXX compat + + @property + def children(self): + return self.results + + @classmethod + def restore(self, id, backend=None): + """Restore previously saved group result.""" + return ( + backend or (self.app.backend if self.app else current_app.backend) + ).restore_group(id) + + +class TaskSetResult(GroupResult): + """Deprecated version of :class:`GroupResult`""" + + def __init__(self, taskset_id, results=None, **kwargs): + # XXX supports the taskset_id kwarg. + # XXX previously the "results" arg was named "subtasks". + if 'subtasks' in kwargs: + results = kwargs['subtasks'] + GroupResult.__init__(self, taskset_id, results, **kwargs) + + def itersubtasks(self): + """Deprecated. Use ``iter(self.results)`` instead.""" + return iter(self.results) + + @property + def total(self): + """Deprecated: Use ``len(r)``.""" + return len(self) + + @property + def taskset_id(self): + """compat alias to :attr:`self.id`""" + return self.id + + @taskset_id.setter # noqa + def taskset_id(self, id): + self.id = id + + +class EagerResult(AsyncResult): + """Result that we know has already been executed.""" + task_name = None + + def __init__(self, id, ret_value, state, traceback=None): + self.id = id + self._result = ret_value + self._state = state + self._traceback = traceback + + def _get_task_meta(self): + return {'task_id': self.id, 'result': self._result, 'status': + self._state, 'traceback': self._traceback} + + def __reduce__(self): + return self.__class__, self.__reduce_args__() + + def __reduce_args__(self): + return (self.id, self._result, self._state, self._traceback) + + def __copy__(self): + cls, args = self.__reduce__() + return cls(*args) + + def ready(self): + return True + + def get(self, timeout=None, propagate=True, **kwargs): + if self.successful(): + return self.result + elif self.state in states.PROPAGATE_STATES: + if propagate: + raise self.result + return self.result + wait = get + + def forget(self): + pass + + def revoke(self, *args, **kwargs): + self._state = states.REVOKED + + def __repr__(self): + return ''.format(self) + + @property + def result(self): + """The tasks return value""" + return self._result + + @property + def state(self): + """The tasks state.""" + return self._state + status = state + + @property + def traceback(self): + """The traceback if the task failed.""" + return self._traceback + + @property + def supports_native_join(self): + return False + + +def result_from_tuple(r, app=None): + # earlier backends may just pickle, so check if + # result is already prepared. + app = app_or_default(app) + Result = app.AsyncResult + if not isinstance(r, ResultBase): + res, nodes = r + if nodes: + return app.GroupResult( + res, [result_from_tuple(child, app) for child in nodes], + ) + # previously did not include parent + id, parent = res if isinstance(res, (list, tuple)) else (res, None) + if parent: + parent = result_from_tuple(parent, app) + return Result(id, parent=parent) + return r +from_serializable = result_from_tuple # XXX compat diff --git a/celery/schedules.py b/celery/schedules.py new file mode 100644 index 0000000..6424dfa --- /dev/null +++ b/celery/schedules.py @@ -0,0 +1,593 @@ +# -*- coding: utf-8 -*- +""" + celery.schedules + ~~~~~~~~~~~~~~~~ + + Schedules define the intervals at which periodic tasks + should run. + +""" +from __future__ import absolute_import + +import numbers +import re + +from collections import namedtuple +from datetime import datetime, timedelta + +from kombu.utils import cached_property + +from . import current_app +from .five import range, string_t +from .utils import is_iterable +from .utils.timeutils import ( + timedelta_seconds, weekday, maybe_timedelta, remaining, + humanize_seconds, timezone, maybe_make_aware, ffwd +) +from .datastructures import AttributeDict + +__all__ = ['ParseException', 'schedule', 'crontab', 'crontab_parser', + 'maybe_schedule'] + +schedstate = namedtuple('schedstate', ('is_due', 'next')) + + +CRON_PATTERN_INVALID = """\ +Invalid crontab pattern. Valid range is {min}-{max}. \ +'{value}' was found.\ +""" + +CRON_INVALID_TYPE = """\ +Argument cronspec needs to be of any of the following types: \ +int, str, or an iterable type. {type!r} was given.\ +""" + +CRON_REPR = """\ +\ +""" + + +def cronfield(s): + return '*' if s is None else s + + +class ParseException(Exception): + """Raised by crontab_parser when the input can't be parsed.""" + + +class schedule(object): + """Schedule for periodic task. + + :param run_every: Interval in seconds (or a :class:`~datetime.timedelta`). + :param relative: If set to True the run time will be rounded to the + resolution of the interval. + :param nowfun: Function returning the current date and time + (class:`~datetime.datetime`). + :param app: Celery app instance. + + """ + relative = False + + def __init__(self, run_every=None, relative=False, nowfun=None, app=None): + self.run_every = maybe_timedelta(run_every) + self.relative = relative + self.nowfun = nowfun + self._app = app + + def now(self): + return (self.nowfun or self.app.now)() + + def remaining_estimate(self, last_run_at): + return remaining( + self.maybe_make_aware(last_run_at), self.run_every, + self.maybe_make_aware(self.now()), self.relative, + ) + + def is_due(self, last_run_at): + """Returns tuple of two items `(is_due, next_time_to_check)`, + where next time to check is in seconds. + + e.g. + + * `(True, 20)`, means the task should be run now, and the next + time to check is in 20 seconds. + + * `(False, 12.3)`, means the task is not due, but that the scheduler + should check again in 12.3 seconds. + + The next time to check is used to save energy/cpu cycles, + it does not need to be accurate but will influence the precision + of your schedule. You must also keep in mind + the value of :setting:`CELERYBEAT_MAX_LOOP_INTERVAL`, + which decides the maximum number of seconds the scheduler can + sleep between re-checking the periodic task intervals. So if you + have a task that changes schedule at runtime then your next_run_at + check will decide how long it will take before a change to the + schedule takes effect. The max loop interval takes precendence + over the next check at value returned. + + .. admonition:: Scheduler max interval variance + + The default max loop interval may vary for different schedulers. + For the default scheduler the value is 5 minutes, but for e.g. + the django-celery database scheduler the value is 5 seconds. + + """ + last_run_at = self.maybe_make_aware(last_run_at) + rem_delta = self.remaining_estimate(last_run_at) + remaining_s = timedelta_seconds(rem_delta) + if remaining_s == 0: + return schedstate(is_due=True, next=self.seconds) + return schedstate(is_due=False, next=remaining_s) + + def maybe_make_aware(self, dt): + if self.utc_enabled: + return maybe_make_aware(dt, self.tz) + return dt + + def __repr__(self): + return ''.format(self) + + def __eq__(self, other): + if isinstance(other, schedule): + return self.run_every == other.run_every + return self.run_every == other + + def __ne__(self, other): + return not self.__eq__(other) + + def __reduce__(self): + return self.__class__, (self.run_every, self.relative, self.nowfun) + + @property + def seconds(self): + return timedelta_seconds(self.run_every) + + @property + def human_seconds(self): + return humanize_seconds(self.seconds) + + @property + def app(self): + return self._app or current_app._get_current_object() + + @app.setter # noqa + def app(self, app): + self._app = app + + @cached_property + def tz(self): + return self.app.timezone + + @cached_property + def utc_enabled(self): + return self.app.conf.CELERY_ENABLE_UTC + + def to_local(self, dt): + if not self.utc_enabled: + return timezone.to_local_fallback(dt) + return dt + + +class crontab_parser(object): + """Parser for crontab expressions. Any expression of the form 'groups' + (see BNF grammar below) is accepted and expanded to a set of numbers. + These numbers represent the units of time that the crontab needs to + run on:: + + digit :: '0'..'9' + dow :: 'a'..'z' + number :: digit+ | dow+ + steps :: number + range :: number ( '-' number ) ? + numspec :: '*' | range + expr :: numspec ( '/' steps ) ? + groups :: expr ( ',' expr ) * + + The parser is a general purpose one, useful for parsing hours, minutes and + day_of_week expressions. Example usage:: + + >>> minutes = crontab_parser(60).parse('*/15') + [0, 15, 30, 45] + >>> hours = crontab_parser(24).parse('*/4') + [0, 4, 8, 12, 16, 20] + >>> day_of_week = crontab_parser(7).parse('*') + [0, 1, 2, 3, 4, 5, 6] + + It can also parse day_of_month and month_of_year expressions if initialized + with an minimum of 1. Example usage:: + + >>> days_of_month = crontab_parser(31, 1).parse('*/3') + [1, 4, 7, 10, 13, 16, 19, 22, 25, 28, 31] + >>> months_of_year = crontab_parser(12, 1).parse('*/2') + [1, 3, 5, 7, 9, 11] + >>> months_of_year = crontab_parser(12, 1).parse('2-12/2') + [2, 4, 6, 8, 10, 12] + + The maximum possible expanded value returned is found by the formula:: + + max_ + min_ - 1 + + """ + ParseException = ParseException + + _range = r'(\w+?)-(\w+)' + _steps = r'/(\w+)?' + _star = r'\*' + + def __init__(self, max_=60, min_=0): + self.max_ = max_ + self.min_ = min_ + self.pats = ( + (re.compile(self._range + self._steps), self._range_steps), + (re.compile(self._range), self._expand_range), + (re.compile(self._star + self._steps), self._star_steps), + (re.compile('^' + self._star + '$'), self._expand_star), + ) + + def parse(self, spec): + acc = set() + for part in spec.split(','): + if not part: + raise self.ParseException('empty part') + acc |= set(self._parse_part(part)) + return acc + + def _parse_part(self, part): + for regex, handler in self.pats: + m = regex.match(part) + if m: + return handler(m.groups()) + return self._expand_range((part, )) + + def _expand_range(self, toks): + fr = self._expand_number(toks[0]) + if len(toks) > 1: + to = self._expand_number(toks[1]) + if to < fr: # Wrap around max_ if necessary + return (list(range(fr, self.min_ + self.max_)) + + list(range(self.min_, to + 1))) + return list(range(fr, to + 1)) + return [fr] + + def _range_steps(self, toks): + if len(toks) != 3 or not toks[2]: + raise self.ParseException('empty filter') + return self._expand_range(toks[:2])[::int(toks[2])] + + def _star_steps(self, toks): + if not toks or not toks[0]: + raise self.ParseException('empty filter') + return self._expand_star()[::int(toks[0])] + + def _expand_star(self, *args): + return list(range(self.min_, self.max_ + self.min_)) + + def _expand_number(self, s): + if isinstance(s, string_t) and s[0] == '-': + raise self.ParseException('negative numbers not supported') + try: + i = int(s) + except ValueError: + try: + i = weekday(s) + except KeyError: + raise ValueError('Invalid weekday literal {0!r}.'.format(s)) + + max_val = self.min_ + self.max_ - 1 + if i > max_val: + raise ValueError( + 'Invalid end range: {0} > {1}.'.format(i, max_val)) + if i < self.min_: + raise ValueError( + 'Invalid beginning range: {0} < {1}.'.format(i, self.min_)) + + return i + + +class crontab(schedule): + """A crontab can be used as the `run_every` value of a + :class:`PeriodicTask` to add cron-like scheduling. + + Like a :manpage:`cron` job, you can specify units of time of when + you would like the task to execute. It is a reasonably complete + implementation of cron's features, so it should provide a fair + degree of scheduling needs. + + You can specify a minute, an hour, a day of the week, a day of the + month, and/or a month in the year in any of the following formats: + + .. attribute:: minute + + - A (list of) integers from 0-59 that represent the minutes of + an hour of when execution should occur; or + - A string representing a crontab pattern. This may get pretty + advanced, like `minute='*/15'` (for every quarter) or + `minute='1,13,30-45,50-59/2'`. + + .. attribute:: hour + + - A (list of) integers from 0-23 that represent the hours of + a day of when execution should occur; or + - A string representing a crontab pattern. This may get pretty + advanced, like `hour='*/3'` (for every three hours) or + `hour='0,8-17/2'` (at midnight, and every two hours during + office hours). + + .. attribute:: day_of_week + + - A (list of) integers from 0-6, where Sunday = 0 and Saturday = + 6, that represent the days of a week that execution should + occur. + - A string representing a crontab pattern. This may get pretty + advanced, like `day_of_week='mon-fri'` (for weekdays only). + (Beware that `day_of_week='*/2'` does not literally mean + 'every two days', but 'every day that is divisible by two'!) + + .. attribute:: day_of_month + + - A (list of) integers from 1-31 that represents the days of the + month that execution should occur. + - A string representing a crontab pattern. This may get pretty + advanced, such as `day_of_month='2-30/3'` (for every even + numbered day) or `day_of_month='1-7,15-21'` (for the first and + third weeks of the month). + + .. attribute:: month_of_year + + - A (list of) integers from 1-12 that represents the months of + the year during which execution can occur. + - A string representing a crontab pattern. This may get pretty + advanced, such as `month_of_year='*/3'` (for the first month + of every quarter) or `month_of_year='2-12/2'` (for every even + numbered month). + + .. attribute:: nowfun + + Function returning the current date and time + (:class:`~datetime.datetime`). + + .. attribute:: app + + The Celery app instance. + + It is important to realize that any day on which execution should + occur must be represented by entries in all three of the day and + month attributes. For example, if `day_of_week` is 0 and `day_of_month` + is every seventh day, only months that begin on Sunday and are also + in the `month_of_year` attribute will have execution events. Or, + `day_of_week` is 1 and `day_of_month` is '1-7,15-21' means every + first and third monday of every month present in `month_of_year`. + + """ + + def __init__(self, minute='*', hour='*', day_of_week='*', + day_of_month='*', month_of_year='*', nowfun=None, app=None): + self._orig_minute = cronfield(minute) + self._orig_hour = cronfield(hour) + self._orig_day_of_week = cronfield(day_of_week) + self._orig_day_of_month = cronfield(day_of_month) + self._orig_month_of_year = cronfield(month_of_year) + self.hour = self._expand_cronspec(hour, 24) + self.minute = self._expand_cronspec(minute, 60) + self.day_of_week = self._expand_cronspec(day_of_week, 7) + self.day_of_month = self._expand_cronspec(day_of_month, 31, 1) + self.month_of_year = self._expand_cronspec(month_of_year, 12, 1) + self.nowfun = nowfun + self._app = app + + @staticmethod + def _expand_cronspec(cronspec, max_, min_=0): + """Takes the given cronspec argument in one of the forms:: + + int (like 7) + str (like '3-5,*/15', '*', or 'monday') + set (like set([0,15,30,45])) + list (like [8-17]) + + And convert it to an (expanded) set representing all time unit + values on which the crontab triggers. Only in case of the base + type being 'str', parsing occurs. (It is fast and + happens only once for each crontab instance, so there is no + significant performance overhead involved.) + + For the other base types, merely Python type conversions happen. + + The argument `max_` is needed to determine the expansion of '*' + and ranges. + The argument `min_` is needed to determine the expansion of '*' + and ranges for 1-based cronspecs, such as day of month or month + of year. The default is sufficient for minute, hour, and day of + week. + + """ + if isinstance(cronspec, numbers.Integral): + result = set([cronspec]) + elif isinstance(cronspec, string_t): + result = crontab_parser(max_, min_).parse(cronspec) + elif isinstance(cronspec, set): + result = cronspec + elif is_iterable(cronspec): + result = set(cronspec) + else: + raise TypeError(CRON_INVALID_TYPE.format(type=type(cronspec))) + + # assure the result does not preceed the min or exceed the max + for number in result: + if number >= max_ + min_ or number < min_: + raise ValueError(CRON_PATTERN_INVALID.format( + min=min_, max=max_ - 1 + min_, value=number)) + return result + + def _delta_to_next(self, last_run_at, next_hour, next_minute): + """ + Takes a datetime of last run, next minute and hour, and + returns a relativedelta for the next scheduled day and time. + Only called when day_of_month and/or month_of_year cronspec + is specified to further limit scheduled task execution. + """ + from bisect import bisect, bisect_left + + datedata = AttributeDict(year=last_run_at.year) + days_of_month = sorted(self.day_of_month) + months_of_year = sorted(self.month_of_year) + + def day_out_of_range(year, month, day): + try: + datetime(year=year, month=month, day=day) + except ValueError: + return True + return False + + def roll_over(): + while 1: + flag = (datedata.dom == len(days_of_month) or + day_out_of_range(datedata.year, + months_of_year[datedata.moy], + days_of_month[datedata.dom]) or + (self.maybe_make_aware(datetime(datedata.year, + months_of_year[datedata.moy], + days_of_month[datedata.dom])) < last_run_at)) + + if flag: + datedata.dom = 0 + datedata.moy += 1 + if datedata.moy == len(months_of_year): + datedata.moy = 0 + datedata.year += 1 + else: + break + + if last_run_at.month in self.month_of_year: + datedata.dom = bisect(days_of_month, last_run_at.day) + datedata.moy = bisect_left(months_of_year, last_run_at.month) + else: + datedata.dom = 0 + datedata.moy = bisect(months_of_year, last_run_at.month) + if datedata.moy == len(months_of_year): + datedata.moy = 0 + roll_over() + + while 1: + th = datetime(year=datedata.year, + month=months_of_year[datedata.moy], + day=days_of_month[datedata.dom]) + if th.isoweekday() % 7 in self.day_of_week: + break + datedata.dom += 1 + roll_over() + + return ffwd(year=datedata.year, + month=months_of_year[datedata.moy], + day=days_of_month[datedata.dom], + hour=next_hour, + minute=next_minute, + second=0, + microsecond=0) + + def now(self): + return (self.nowfun or self.app.now)() + + def __repr__(self): + return CRON_REPR.format(self) + + def __reduce__(self): + return (self.__class__, (self._orig_minute, + self._orig_hour, + self._orig_day_of_week, + self._orig_day_of_month, + self._orig_month_of_year), None) + + def remaining_delta(self, last_run_at, tz=None, ffwd=ffwd): + tz = tz or self.tz + last_run_at = self.maybe_make_aware(last_run_at) + now = self.maybe_make_aware(self.now()) + dow_num = last_run_at.isoweekday() % 7 # Sunday is day 0, not day 7 + + execute_this_date = (last_run_at.month in self.month_of_year and + last_run_at.day in self.day_of_month and + dow_num in self.day_of_week) + + execute_this_hour = (execute_this_date and + last_run_at.day == now.day and + last_run_at.month == now.month and + last_run_at.year == now.year and + last_run_at.hour in self.hour and + last_run_at.minute < max(self.minute)) + + if execute_this_hour: + next_minute = min(minute for minute in self.minute + if minute > last_run_at.minute) + delta = ffwd(minute=next_minute, second=0, microsecond=0) + else: + next_minute = min(self.minute) + execute_today = (execute_this_date and + last_run_at.hour < max(self.hour)) + + if execute_today: + next_hour = min(hour for hour in self.hour + if hour > last_run_at.hour) + delta = ffwd(hour=next_hour, minute=next_minute, + second=0, microsecond=0) + else: + next_hour = min(self.hour) + all_dom_moy = (self._orig_day_of_month == '*' and + self._orig_month_of_year == '*') + if all_dom_moy: + next_day = min([day for day in self.day_of_week + if day > dow_num] or self.day_of_week) + add_week = next_day == dow_num + + delta = ffwd(weeks=add_week and 1 or 0, + weekday=(next_day - 1) % 7, + hour=next_hour, + minute=next_minute, + second=0, + microsecond=0) + else: + delta = self._delta_to_next(last_run_at, + next_hour, next_minute) + return self.to_local(last_run_at), delta, self.to_local(now) + + def remaining_estimate(self, last_run_at, ffwd=ffwd): + """Returns when the periodic task should run next as a timedelta.""" + return remaining(*self.remaining_delta(last_run_at, ffwd=ffwd)) + + def is_due(self, last_run_at): + """Returns tuple of two items `(is_due, next_time_to_run)`, + where next time to run is in seconds. + + See :meth:`celery.schedules.schedule.is_due` for more information. + + """ + rem_delta = self.remaining_estimate(last_run_at) + rem = timedelta_seconds(rem_delta) + due = rem == 0 + if due: + rem_delta = self.remaining_estimate(self.now()) + rem = timedelta_seconds(rem_delta) + return schedstate(due, rem) + + def __eq__(self, other): + if isinstance(other, crontab): + return (other.month_of_year == self.month_of_year and + other.day_of_month == self.day_of_month and + other.day_of_week == self.day_of_week and + other.hour == self.hour and + other.minute == self.minute) + return NotImplemented + + def __ne__(self, other): + return not self.__eq__(other) + + +def maybe_schedule(s, relative=False, app=None): + if s is not None: + if isinstance(s, numbers.Integral): + s = timedelta(seconds=s) + if isinstance(s, timedelta): + return schedule(s, relative, app=app) + else: + s.app = app + return s diff --git a/celery/security/__init__.py b/celery/security/__init__.py new file mode 100644 index 0000000..352d400 --- /dev/null +++ b/celery/security/__init__.py @@ -0,0 +1,71 @@ +# -*- coding: utf-8 -*- +""" + celery.security + ~~~~~~~~~~~~~~~ + + Module implementing the signing message serializer. + +""" +from __future__ import absolute_import + +from kombu.serialization import ( + registry, disable_insecure_serializers as _disable_insecure_serializers, +) + +from celery.exceptions import ImproperlyConfigured + +from .serialization import register_auth + +SSL_NOT_INSTALLED = """\ +You need to install the pyOpenSSL library to use the auth serializer. +Please install by: + + $ pip install pyOpenSSL +""" + +SETTING_MISSING = """\ +Sorry, but you have to configure the + * CELERY_SECURITY_KEY + * CELERY_SECURITY_CERTIFICATE, and the + * CELERY_SECURITY_CERT_STORE +configuration settings to use the auth serializer. + +Please see the configuration reference for more information. +""" + +__all__ = ['setup_security'] + + +def setup_security(allowed_serializers=None, key=None, cert=None, store=None, + digest='sha1', serializer='json', app=None): + """See :meth:`@Celery.setup_security`.""" + if app is None: + from celery import current_app + app = current_app._get_current_object() + + _disable_insecure_serializers(allowed_serializers) + + conf = app.conf + if conf.CELERY_TASK_SERIALIZER != 'auth': + return + + try: + from OpenSSL import crypto # noqa + except ImportError: + raise ImproperlyConfigured(SSL_NOT_INSTALLED) + + key = key or conf.CELERY_SECURITY_KEY + cert = cert or conf.CELERY_SECURITY_CERTIFICATE + store = store or conf.CELERY_SECURITY_CERT_STORE + + if not (key and cert and store): + raise ImproperlyConfigured(SETTING_MISSING) + + with open(key) as kf: + with open(cert) as cf: + register_auth(kf.read(), cf.read(), store, digest, serializer) + registry._set_default_serializer('auth') + + +def disable_untrusted_serializers(whitelist=None): + _disable_insecure_serializers(allowed=whitelist) diff --git a/celery/security/certificate.py b/celery/security/certificate.py new file mode 100644 index 0000000..c1c520c --- /dev/null +++ b/celery/security/certificate.py @@ -0,0 +1,93 @@ +# -*- coding: utf-8 -*- +""" + celery.security.certificate + ~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + X.509 certificates. + +""" +from __future__ import absolute_import + +import glob +import os + +from kombu.utils.encoding import bytes_to_str + +from celery.exceptions import SecurityError +from celery.five import values + +from .utils import crypto, reraise_errors + +__all__ = ['Certificate', 'CertStore', 'FSCertStore'] + + +class Certificate(object): + """X.509 certificate.""" + + def __init__(self, cert): + assert crypto is not None + with reraise_errors('Invalid certificate: {0!r}'): + self._cert = crypto.load_certificate(crypto.FILETYPE_PEM, cert) + + def has_expired(self): + """Check if the certificate has expired.""" + return self._cert.has_expired() + + def get_serial_number(self): + """Return the serial number in the certificate.""" + return bytes_to_str(self._cert.get_serial_number()) + + def get_issuer(self): + """Return issuer (CA) as a string""" + return ' '.join(bytes_to_str(x[1]) for x in + self._cert.get_issuer().get_components()) + + def get_id(self): + """Serial number/issuer pair uniquely identifies a certificate""" + return '{0} {1}'.format(self.get_issuer(), self.get_serial_number()) + + def verify(self, data, signature, digest): + """Verifies the signature for string containing data.""" + with reraise_errors('Bad signature: {0!r}'): + crypto.verify(self._cert, signature, data, digest) + + +class CertStore(object): + """Base class for certificate stores""" + + def __init__(self): + self._certs = {} + + def itercerts(self): + """an iterator over the certificates""" + for c in values(self._certs): + yield c + + def __getitem__(self, id): + """get certificate by id""" + try: + return self._certs[bytes_to_str(id)] + except KeyError: + raise SecurityError('Unknown certificate: {0!r}'.format(id)) + + def add_cert(self, cert): + cert_id = bytes_to_str(cert.get_id()) + if cert_id in self._certs: + raise SecurityError('Duplicate certificate: {0!r}'.format(id)) + self._certs[cert_id] = cert + + +class FSCertStore(CertStore): + """File system certificate store""" + + def __init__(self, path): + CertStore.__init__(self) + if os.path.isdir(path): + path = os.path.join(path, '*') + for p in glob.glob(path): + with open(p) as f: + cert = Certificate(f.read()) + if cert.has_expired(): + raise SecurityError( + 'Expired certificate: {0!r}'.format(cert.get_id())) + self.add_cert(cert) diff --git a/celery/security/key.py b/celery/security/key.py new file mode 100644 index 0000000..a5c2620 --- /dev/null +++ b/celery/security/key.py @@ -0,0 +1,27 @@ +# -*- coding: utf-8 -*- +""" + celery.security.key + ~~~~~~~~~~~~~~~~~~~ + + Private key for the security serializer. + +""" +from __future__ import absolute_import + +from kombu.utils.encoding import ensure_bytes + +from .utils import crypto, reraise_errors + +__all__ = ['PrivateKey'] + + +class PrivateKey(object): + + def __init__(self, key): + with reraise_errors('Invalid private key: {0!r}'): + self._key = crypto.load_privatekey(crypto.FILETYPE_PEM, key) + + def sign(self, data, digest): + """sign string containing data.""" + with reraise_errors('Unable to sign data: {0!r}'): + return crypto.sign(self._key, ensure_bytes(data), digest) diff --git a/celery/security/serialization.py b/celery/security/serialization.py new file mode 100644 index 0000000..f1cab29 --- /dev/null +++ b/celery/security/serialization.py @@ -0,0 +1,110 @@ +# -*- coding: utf-8 -*- +""" + celery.security.serialization + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + Secure serializer. + +""" +from __future__ import absolute_import + +import base64 + +from kombu.serialization import registry, dumps, loads +from kombu.utils.encoding import bytes_to_str, str_to_bytes, ensure_bytes + +from .certificate import Certificate, FSCertStore +from .key import PrivateKey +from .utils import reraise_errors + +__all__ = ['SecureSerializer', 'register_auth'] + + +def b64encode(s): + return bytes_to_str(base64.b64encode(str_to_bytes(s))) + + +def b64decode(s): + return base64.b64decode(str_to_bytes(s)) + + +class SecureSerializer(object): + + def __init__(self, key=None, cert=None, cert_store=None, + digest='sha1', serializer='json'): + self._key = key + self._cert = cert + self._cert_store = cert_store + self._digest = digest + self._serializer = serializer + + def serialize(self, data): + """serialize data structure into string""" + assert self._key is not None + assert self._cert is not None + with reraise_errors('Unable to serialize: {0!r}', (Exception, )): + content_type, content_encoding, body = dumps( + bytes_to_str(data), serializer=self._serializer) + # What we sign is the serialized body, not the body itself. + # this way the receiver doesn't have to decode the contents + # to verify the signature (and thus avoiding potential flaws + # in the decoding step). + body = ensure_bytes(body) + return self._pack(body, content_type, content_encoding, + signature=self._key.sign(body, self._digest), + signer=self._cert.get_id()) + + def deserialize(self, data): + """deserialize data structure from string""" + assert self._cert_store is not None + with reraise_errors('Unable to deserialize: {0!r}', (Exception, )): + payload = self._unpack(data) + signature, signer, body = (payload['signature'], + payload['signer'], + payload['body']) + self._cert_store[signer].verify(body, signature, self._digest) + return loads(bytes_to_str(body), payload['content_type'], + payload['content_encoding'], force=True) + + def _pack(self, body, content_type, content_encoding, signer, signature, + sep=str_to_bytes('\x00\x01')): + fields = sep.join( + ensure_bytes(s) for s in [signer, signature, content_type, + content_encoding, body] + ) + return b64encode(fields) + + def _unpack(self, payload, sep=str_to_bytes('\x00\x01')): + raw_payload = b64decode(ensure_bytes(payload)) + first_sep = raw_payload.find(sep) + + signer = raw_payload[:first_sep] + signer_cert = self._cert_store[signer] + + sig_len = signer_cert._cert.get_pubkey().bits() >> 3 + signature = raw_payload[ + first_sep + len(sep):first_sep + len(sep) + sig_len + ] + end_of_sig = first_sep + len(sep) + sig_len+len(sep) + + v = raw_payload[end_of_sig:].split(sep) + + return { + 'signer': signer, + 'signature': signature, + 'content_type': bytes_to_str(v[0]), + 'content_encoding': bytes_to_str(v[1]), + 'body': bytes_to_str(v[2]), + } + + +def register_auth(key=None, cert=None, store=None, digest='sha1', + serializer='json'): + """register security serializer""" + s = SecureSerializer(key and PrivateKey(key), + cert and Certificate(cert), + store and FSCertStore(store), + digest=digest, serializer=serializer) + registry.register('auth', s.serialize, s.deserialize, + content_type='application/data', + content_encoding='utf-8') diff --git a/celery/security/utils.py b/celery/security/utils.py new file mode 100644 index 0000000..d184d0b --- /dev/null +++ b/celery/security/utils.py @@ -0,0 +1,35 @@ +# -*- coding: utf-8 -*- +""" + celery.security.utils + ~~~~~~~~~~~~~~~~~~~~~ + + Utilities used by the message signing serializer. + +""" +from __future__ import absolute_import + +import sys + +from contextlib import contextmanager + +from celery.exceptions import SecurityError +from celery.five import reraise + +try: + from OpenSSL import crypto +except ImportError: # pragma: no cover + crypto = None # noqa + +__all__ = ['reraise_errors'] + + +@contextmanager +def reraise_errors(msg='{0!r}', errors=None): + assert crypto is not None + errors = (crypto.Error, ) if errors is None else errors + try: + yield + except errors as exc: + reraise(SecurityError, + SecurityError(msg.format(exc)), + sys.exc_info()[2]) diff --git a/celery/signals.py b/celery/signals.py new file mode 100644 index 0000000..2091830 --- /dev/null +++ b/celery/signals.py @@ -0,0 +1,76 @@ +# -*- coding: utf-8 -*- +""" + celery.signals + ~~~~~~~~~~~~~~ + + This module defines the signals (Observer pattern) sent by + both workers and clients. + + Functions can be connected to these signals, and connected + functions are called whenever a signal is called. + + See :ref:`signals` for more information. + +""" +from __future__ import absolute_import +from .utils.dispatch import Signal + +__all__ = ['before_task_publish', 'after_task_publish', + 'task_prerun', 'task_postrun', 'task_success', + 'task_retry', 'task_failure', 'task_revoked', 'celeryd_init', + 'celeryd_after_setup', 'worker_init', 'worker_process_init', + 'worker_ready', 'worker_shutdown', 'setup_logging', + 'after_setup_logger', 'after_setup_task_logger', + 'beat_init', 'beat_embedded_init', 'eventlet_pool_started', + 'eventlet_pool_preshutdown', 'eventlet_pool_postshutdown', + 'eventlet_pool_apply'] + +before_task_publish = Signal(providing_args=[ + 'body', 'exchange', 'routing_key', 'headers', 'properties', + 'declare', 'retry_policy', +]) +after_task_publish = Signal(providing_args=[ + 'body', 'exchange', 'routing_key', +]) +#: Deprecated, use after_task_publish instead. +task_sent = Signal(providing_args=[ + 'task_id', 'task', 'args', 'kwargs', 'eta', 'taskset', +]) +task_prerun = Signal(providing_args=['task_id', 'task', 'args', 'kwargs']) +task_postrun = Signal(providing_args=[ + 'task_id', 'task', 'args', 'kwargs', 'retval', +]) +task_success = Signal(providing_args=['result']) +task_retry = Signal(providing_args=[ + 'request', 'reason', 'einfo', +]) +task_failure = Signal(providing_args=[ + 'task_id', 'exception', 'args', 'kwargs', 'traceback', 'einfo', +]) +task_revoked = Signal(providing_args=[ + 'request', 'terminated', 'signum', 'expired', +]) +celeryd_init = Signal(providing_args=['instance', 'conf', 'options']) +celeryd_after_setup = Signal(providing_args=['instance', 'conf']) +import_modules = Signal(providing_args=[]) +worker_init = Signal(providing_args=[]) +worker_process_init = Signal(providing_args=[]) +worker_process_shutdown = Signal(providing_args=[]) +worker_ready = Signal(providing_args=[]) +worker_shutdown = Signal(providing_args=[]) +setup_logging = Signal(providing_args=[ + 'loglevel', 'logfile', 'format', 'colorize', +]) +after_setup_logger = Signal(providing_args=[ + 'logger', 'loglevel', 'logfile', 'format', 'colorize', +]) +after_setup_task_logger = Signal(providing_args=[ + 'logger', 'loglevel', 'logfile', 'format', 'colorize', +]) +beat_init = Signal(providing_args=[]) +beat_embedded_init = Signal(providing_args=[]) +eventlet_pool_started = Signal(providing_args=[]) +eventlet_pool_preshutdown = Signal(providing_args=[]) +eventlet_pool_postshutdown = Signal(providing_args=[]) +eventlet_pool_apply = Signal(providing_args=['target', 'args', 'kwargs']) +user_preload_options = Signal(providing_args=['app', 'options']) diff --git a/celery/states.py b/celery/states.py new file mode 100644 index 0000000..665a57b --- /dev/null +++ b/celery/states.py @@ -0,0 +1,153 @@ +# -*- coding: utf-8 -*- +""" +celery.states +============= + +Built-in task states. + +.. _states: + +States +------ + +See :ref:`task-states`. + +.. _statesets: + +Sets +---- + +.. state:: READY_STATES + +READY_STATES +~~~~~~~~~~~~ + +Set of states meaning the task result is ready (has been executed). + +.. state:: UNREADY_STATES + +UNREADY_STATES +~~~~~~~~~~~~~~ + +Set of states meaning the task result is not ready (has not been executed). + +.. state:: EXCEPTION_STATES + +EXCEPTION_STATES +~~~~~~~~~~~~~~~~ + +Set of states meaning the task returned an exception. + +.. state:: PROPAGATE_STATES + +PROPAGATE_STATES +~~~~~~~~~~~~~~~~ + +Set of exception states that should propagate exceptions to the user. + +.. state:: ALL_STATES + +ALL_STATES +~~~~~~~~~~ + +Set of all possible states. + + +Misc. +----- + +""" +from __future__ import absolute_import + +__all__ = ['PENDING', 'RECEIVED', 'STARTED', 'SUCCESS', 'FAILURE', + 'REVOKED', 'RETRY', 'IGNORED', 'READY_STATES', 'UNREADY_STATES', + 'EXCEPTION_STATES', 'PROPAGATE_STATES', 'precedence', 'state'] + +#: State precedence. +#: None represents the precedence of an unknown state. +#: Lower index means higher precedence. +PRECEDENCE = ['SUCCESS', + 'FAILURE', + None, + 'REVOKED', + 'STARTED', + 'RECEIVED', + 'RETRY', + 'PENDING'] + +#: Hash lookup of PRECEDENCE to index +PRECEDENCE_LOOKUP = dict(zip(PRECEDENCE, range(0, len(PRECEDENCE)))) +NONE_PRECEDENCE = PRECEDENCE_LOOKUP[None] + + +def precedence(state): + """Get the precedence index for state. + + Lower index means higher precedence. + + """ + try: + return PRECEDENCE_LOOKUP[state] + except KeyError: + return NONE_PRECEDENCE + + +class state(str): + """State is a subclass of :class:`str`, implementing comparison + methods adhering to state precedence rules:: + + >>> from celery.states import state, PENDING, SUCCESS + + >>> state(PENDING) < state(SUCCESS) + True + + Any custom state is considered to be lower than :state:`FAILURE` and + :state:`SUCCESS`, but higher than any of the other built-in states:: + + >>> state('PROGRESS') > state(STARTED) + True + + >>> state('PROGRESS') > state('SUCCESS') + False + + """ + + def compare(self, other, fun): + return fun(precedence(self), precedence(other)) + + def __gt__(self, other): + return precedence(self) < precedence(other) + + def __ge__(self, other): + return precedence(self) <= precedence(other) + + def __lt__(self, other): + return precedence(self) > precedence(other) + + def __le__(self, other): + return precedence(self) >= precedence(other) + +#: Task state is unknown (assumed pending since you know the id). +PENDING = 'PENDING' +#: Task was received by a worker. +RECEIVED = 'RECEIVED' +#: Task was started by a worker (:setting:`CELERY_TRACK_STARTED`). +STARTED = 'STARTED' +#: Task succeeded +SUCCESS = 'SUCCESS' +#: Task failed +FAILURE = 'FAILURE' +#: Task was revoked. +REVOKED = 'REVOKED' +#: Task is waiting for retry. +RETRY = 'RETRY' +IGNORED = 'IGNORED' +REJECTED = 'REJECTED' + +READY_STATES = frozenset([SUCCESS, FAILURE, REVOKED]) +UNREADY_STATES = frozenset([PENDING, RECEIVED, STARTED, RETRY]) +EXCEPTION_STATES = frozenset([RETRY, FAILURE, REVOKED]) +PROPAGATE_STATES = frozenset([FAILURE, REVOKED]) + +ALL_STATES = frozenset([PENDING, RECEIVED, STARTED, + SUCCESS, FAILURE, RETRY, REVOKED]) diff --git a/celery/task/__init__.py b/celery/task/__init__.py new file mode 100644 index 0000000..4ab1a2f --- /dev/null +++ b/celery/task/__init__.py @@ -0,0 +1,59 @@ +# -*- coding: utf-8 -*- +""" + celery.task + ~~~~~~~~~~~ + + This is the old task module, it should not be used anymore, + import from the main 'celery' module instead. + If you're looking for the decorator implementation then that's in + ``celery.app.base.Celery.task``. + +""" +from __future__ import absolute_import + +from celery._state import current_app, current_task as current +from celery.five import LazyModule, recreate_module +from celery.local import Proxy + +__all__ = [ + 'BaseTask', 'Task', 'PeriodicTask', 'task', 'periodic_task', + 'group', 'chord', 'subtask', 'TaskSet', +] + + +STATICA_HACK = True +globals()['kcah_acitats'[::-1].upper()] = False +if STATICA_HACK: # pragma: no cover + # This is never executed, but tricks static analyzers (PyDev, PyCharm, + # pylint, etc.) into knowing the types of these symbols, and what + # they contain. + from celery.canvas import group, chord, subtask + from .base import BaseTask, Task, PeriodicTask, task, periodic_task + from .sets import TaskSet + + +class module(LazyModule): + + def __call__(self, *args, **kwargs): + return self.task(*args, **kwargs) + + +old_module, new_module = recreate_module( # pragma: no cover + __name__, + by_module={ + 'celery.task.base': ['BaseTask', 'Task', 'PeriodicTask', + 'task', 'periodic_task'], + 'celery.canvas': ['group', 'chord', 'subtask'], + 'celery.task.sets': ['TaskSet'], + }, + base=module, + __package__='celery.task', + __file__=__file__, + __path__=__path__, + __doc__=__doc__, + current=current, + discard_all=Proxy(lambda: current_app.control.purge), + backend_cleanup=Proxy( + lambda: current_app.tasks['celery.backend_cleanup'] + ), +) diff --git a/celery/task/base.py b/celery/task/base.py new file mode 100644 index 0000000..aeb9f82 --- /dev/null +++ b/celery/task/base.py @@ -0,0 +1,179 @@ +# -*- coding: utf-8 -*- +""" + celery.task.base + ~~~~~~~~~~~~~~~~ + + The task implementation has been moved to :mod:`celery.app.task`. + + This contains the backward compatible Task class used in the old API, + and shouldn't be used in new applications. + +""" +from __future__ import absolute_import + +from kombu import Exchange + +from celery import current_app +from celery.app.task import Context, TaskType, Task as BaseTask # noqa +from celery.five import class_property, reclassmethod +from celery.schedules import maybe_schedule +from celery.utils.log import get_task_logger + +__all__ = ['Task', 'PeriodicTask', 'task'] + +#: list of methods that must be classmethods in the old API. +_COMPAT_CLASSMETHODS = ( + 'delay', 'apply_async', 'retry', 'apply', 'subtask_from_request', + 'AsyncResult', 'subtask', '_get_request', '_get_exec_options', +) + + +class Task(BaseTask): + """Deprecated Task base class. + + Modern applications should use :class:`celery.Task` instead. + + """ + abstract = True + __bound__ = False + __v2_compat__ = True + + # - Deprecated compat. attributes -: + + queue = None + routing_key = None + exchange = None + exchange_type = None + delivery_mode = None + mandatory = False # XXX deprecated + immediate = False # XXX deprecated + priority = None + type = 'regular' + disable_error_emails = False + accept_magic_kwargs = False + + from_config = BaseTask.from_config + ( + ('exchange_type', 'CELERY_DEFAULT_EXCHANGE_TYPE'), + ('delivery_mode', 'CELERY_DEFAULT_DELIVERY_MODE'), + ) + + # In old Celery the @task decorator didn't exist, so one would create + # classes instead and use them directly (e.g. MyTask.apply_async()). + # the use of classmethods was a hack so that it was not necessary + # to instantiate the class before using it, but it has only + # given us pain (like all magic). + for name in _COMPAT_CLASSMETHODS: + locals()[name] = reclassmethod(getattr(BaseTask, name)) + + @class_property + def request(cls): + return cls._get_request() + + @class_property + def backend(cls): + if cls._backend is None: + return cls.app.backend + return cls._backend + + @backend.setter + def backend(cls, value): # noqa + cls._backend = value + + @classmethod + def get_logger(self, **kwargs): + return get_task_logger(self.name) + + @classmethod + def establish_connection(self): + """Deprecated method used to get a broker connection. + + Should be replaced with :meth:`@Celery.connection` + instead, or by acquiring connections from the connection pool: + + .. code-block:: python + + # using the connection pool + with celery.pool.acquire(block=True) as conn: + ... + + # establish fresh connection + with celery.connection() as conn: + ... + """ + return self._get_app().connection() + + def get_publisher(self, connection=None, exchange=None, + exchange_type=None, **options): + """Deprecated method to get the task publisher (now called producer). + + Should be replaced with :class:`@amqp.TaskProducer`: + + .. code-block:: python + + with celery.connection() as conn: + with celery.amqp.TaskProducer(conn) as prod: + my_task.apply_async(producer=prod) + + """ + exchange = self.exchange if exchange is None else exchange + if exchange_type is None: + exchange_type = self.exchange_type + connection = connection or self.establish_connection() + return self._get_app().amqp.TaskProducer( + connection, + exchange=exchange and Exchange(exchange, exchange_type), + routing_key=self.routing_key, **options + ) + + @classmethod + def get_consumer(self, connection=None, queues=None, **kwargs): + """Deprecated method used to get consumer for the queue + this task is sent to. + + Should be replaced with :class:`@amqp.TaskConsumer` instead: + + """ + Q = self._get_app().amqp + connection = connection or self.establish_connection() + if queues is None: + queues = Q.queues[self.queue] if self.queue else Q.default_queue + return Q.TaskConsumer(connection, queues, **kwargs) + + +class PeriodicTask(Task): + """A periodic task is a task that adds itself to the + :setting:`CELERYBEAT_SCHEDULE` setting.""" + abstract = True + ignore_result = True + relative = False + options = None + compat = True + + def __init__(self): + if not hasattr(self, 'run_every'): + raise NotImplementedError( + 'Periodic tasks must have a run_every attribute') + self.run_every = maybe_schedule(self.run_every, self.relative) + super(PeriodicTask, self).__init__() + + @classmethod + def on_bound(cls, app): + app.conf.CELERYBEAT_SCHEDULE[cls.name] = { + 'task': cls.name, + 'schedule': cls.run_every, + 'args': (), + 'kwargs': {}, + 'options': cls.options or {}, + 'relative': cls.relative, + } + + +def task(*args, **kwargs): + """Deprecated decorator, please use :func:`celery.task`.""" + return current_app.task(*args, **dict({'accept_magic_kwargs': False, + 'base': Task}, **kwargs)) + + +def periodic_task(*args, **options): + """Deprecated decorator, please use :setting:`CELERYBEAT_SCHEDULE`.""" + return task(**dict({'base': PeriodicTask}, **options)) diff --git a/celery/task/http.py b/celery/task/http.py new file mode 100644 index 0000000..e170ec3 --- /dev/null +++ b/celery/task/http.py @@ -0,0 +1,220 @@ +# -*- coding: utf-8 -*- +""" + celery.task.http + ~~~~~~~~~~~~~~~~ + + Webhook task implementation. + +""" +from __future__ import absolute_import + +import anyjson +import sys + +try: + from urllib.parse import parse_qsl, urlencode, urlparse # Py3 +except ImportError: # pragma: no cover + from urllib import urlencode # noqa + from urlparse import urlparse, parse_qsl # noqa + +from celery import shared_task, __version__ as celery_version +from celery.five import items, reraise +from celery.utils.log import get_task_logger + +__all__ = ['InvalidResponseError', 'RemoteExecuteError', 'UnknownStatusError', + 'HttpDispatch', 'dispatch', 'URL'] + +GET_METHODS = frozenset(['GET', 'HEAD']) +logger = get_task_logger(__name__) + + +if sys.version_info[0] == 3: # pragma: no cover + + from urllib.request import Request, urlopen + + def utf8dict(tup): + if not isinstance(tup, dict): + return dict(tup) + return tup + +else: + + from urllib2 import Request, urlopen # noqa + + def utf8dict(tup): # noqa + """With a dict's items() tuple return a new dict with any utf-8 + keys/values encoded.""" + return dict( + (k.encode('utf-8'), + v.encode('utf-8') if isinstance(v, unicode) else v) # noqa + for k, v in tup) + + +class InvalidResponseError(Exception): + """The remote server gave an invalid response.""" + + +class RemoteExecuteError(Exception): + """The remote task gave a custom error.""" + + +class UnknownStatusError(InvalidResponseError): + """The remote server gave an unknown status.""" + + +def extract_response(raw_response, loads=anyjson.loads): + """Extract the response text from a raw JSON response.""" + if not raw_response: + raise InvalidResponseError('Empty response') + try: + payload = loads(raw_response) + except ValueError as exc: + reraise(InvalidResponseError, InvalidResponseError( + str(exc)), sys.exc_info()[2]) + + status = payload['status'] + if status == 'success': + return payload['retval'] + elif status == 'failure': + raise RemoteExecuteError(payload.get('reason')) + else: + raise UnknownStatusError(str(status)) + + +class MutableURL(object): + """Object wrapping a Uniform Resource Locator. + + Supports editing the query parameter list. + You can convert the object back to a string, the query will be + properly urlencoded. + + Examples + + >>> url = URL('http://www.google.com:6580/foo/bar?x=3&y=4#foo') + >>> url.query + {'x': '3', 'y': '4'} + >>> str(url) + 'http://www.google.com:6580/foo/bar?y=4&x=3#foo' + >>> url.query['x'] = 10 + >>> url.query.update({'George': 'Costanza'}) + >>> str(url) + 'http://www.google.com:6580/foo/bar?y=4&x=10&George=Costanza#foo' + + """ + def __init__(self, url): + self.parts = urlparse(url) + self.query = dict(parse_qsl(self.parts[4])) + + def __str__(self): + scheme, netloc, path, params, query, fragment = self.parts + query = urlencode(utf8dict(items(self.query))) + components = [scheme + '://', netloc, path or '/', + ';{0}'.format(params) if params else '', + '?{0}'.format(query) if query else '', + '#{0}'.format(fragment) if fragment else ''] + return ''.join(c for c in components if c) + + def __repr__(self): + return '<{0}: {1}>'.format(type(self).__name__, self) + + +class HttpDispatch(object): + """Make task HTTP request and collect the task result. + + :param url: The URL to request. + :param method: HTTP method used. Currently supported methods are `GET` + and `POST`. + :param task_kwargs: Task keyword arguments. + :param logger: Logger used for user/system feedback. + + """ + user_agent = 'celery/{version}'.format(version=celery_version) + timeout = 5 + + def __init__(self, url, method, task_kwargs, **kwargs): + self.url = url + self.method = method + self.task_kwargs = task_kwargs + self.logger = kwargs.get('logger') or logger + + def make_request(self, url, method, params): + """Perform HTTP request and return the response.""" + request = Request(url, params) + for key, val in items(self.http_headers): + request.add_header(key, val) + response = urlopen(request) # user catches errors. + return response.read() + + def dispatch(self): + """Dispatch callback and return result.""" + url = MutableURL(self.url) + params = None + if self.method in GET_METHODS: + url.query.update(self.task_kwargs) + else: + params = urlencode(utf8dict(items(self.task_kwargs))) + raw_response = self.make_request(str(url), self.method, params) + return extract_response(raw_response) + + @property + def http_headers(self): + headers = {'User-Agent': self.user_agent} + return headers + + +@shared_task(name='celery.http_dispatch', bind=True, + url=None, method=None, accept_magic_kwargs=False) +def dispatch(self, url=None, method='GET', **kwargs): + """Task dispatching to an URL. + + :keyword url: The URL location of the HTTP callback task. + :keyword method: Method to use when dispatching the callback. Usually + `GET` or `POST`. + :keyword \*\*kwargs: Keyword arguments to pass on to the HTTP callback. + + .. attribute:: url + + If this is set, this is used as the default URL for requests. + Default is to require the user of the task to supply the url as an + argument, as this attribute is intended for subclasses. + + .. attribute:: method + + If this is set, this is the default method used for requests. + Default is to require the user of the task to supply the method as an + argument, as this attribute is intended for subclasses. + + """ + return HttpDispatch( + url or self.url, method or self.method, kwargs, + ).dispatch() + + +class URL(MutableURL): + """HTTP Callback URL + + Supports requesting an URL asynchronously. + + :param url: URL to request. + :keyword dispatcher: Class used to dispatch the request. + By default this is :func:`dispatch`. + + """ + dispatcher = None + + def __init__(self, url, dispatcher=None, app=None): + super(URL, self).__init__(url) + self.app = app + self.dispatcher = dispatcher or self.dispatcher + if self.dispatcher is None: + # Get default dispatcher + self.dispatcher = ( + self.app.tasks['celery.http_dispatch'] if self.app + else dispatch + ) + + def get_async(self, **kwargs): + return self.dispatcher.delay(str(self), 'GET', **kwargs) + + def post_async(self, **kwargs): + return self.dispatcher.delay(str(self), 'POST', **kwargs) diff --git a/celery/task/sets.py b/celery/task/sets.py new file mode 100644 index 0000000..e277b79 --- /dev/null +++ b/celery/task/sets.py @@ -0,0 +1,88 @@ +# -*- coding: utf-8 -*- +""" + celery.task.sets + ~~~~~~~~~~~~~~~~ + + Old ``group`` implementation, this module should + not be used anymore use :func:`celery.group` instead. + +""" +from __future__ import absolute_import + +from celery._state import get_current_worker_task +from celery.app import app_or_default +from celery.canvas import maybe_signature # noqa +from celery.utils import uuid, warn_deprecated + +from celery.canvas import subtask # noqa + +warn_deprecated( + 'celery.task.sets and TaskSet', removal='4.0', + alternative="""\ +Please use "group" instead (see the Canvas section in the userguide)\ +""") + + +class TaskSet(list): + """A task containing several subtasks, making it possible + to track how many, or when all of the tasks have been completed. + + :param tasks: A list of :class:`subtask` instances. + + Example:: + + >>> from myproj.tasks import refresh_feed + + >>> urls = ('http://cnn.com/rss', 'http://bbc.co.uk/rss') + >>> s = TaskSet(refresh_feed.s(url) for url in urls) + >>> taskset_result = s.apply_async() + >>> list_of_return_values = taskset_result.join() # *expensive* + + """ + app = None + + def __init__(self, tasks=None, app=None, Publisher=None): + self.app = app_or_default(app or self.app) + super(TaskSet, self).__init__( + maybe_signature(t, app=self.app) for t in tasks or [] + ) + self.Publisher = Publisher or self.app.amqp.TaskProducer + self.total = len(self) # XXX compat + + def apply_async(self, connection=None, publisher=None, taskset_id=None): + """Apply TaskSet.""" + app = self.app + + if app.conf.CELERY_ALWAYS_EAGER: + return self.apply(taskset_id=taskset_id) + + with app.connection_or_acquire(connection) as conn: + setid = taskset_id or uuid() + pub = publisher or self.Publisher(conn) + results = self._async_results(setid, pub) + + result = app.TaskSetResult(setid, results) + parent = get_current_worker_task() + if parent: + parent.add_trail(result) + return result + + def _async_results(self, taskset_id, publisher): + return [task.apply_async(taskset_id=taskset_id, publisher=publisher) + for task in self] + + def apply(self, taskset_id=None): + """Applies the TaskSet locally by blocking until all tasks return.""" + setid = taskset_id or uuid() + return self.app.TaskSetResult(setid, self._sync_results(setid)) + + def _sync_results(self, taskset_id): + return [task.apply(taskset_id=taskset_id) for task in self] + + @property + def tasks(self): + return self + + @tasks.setter # noqa + def tasks(self, tasks): + self[:] = tasks diff --git a/celery/task/trace.py b/celery/task/trace.py new file mode 100644 index 0000000..5e5f5a8 --- /dev/null +++ b/celery/task/trace.py @@ -0,0 +1,12 @@ +"""This module has moved to celery.app.trace.""" +from __future__ import absolute_import + +import sys + +from celery.utils import warn_deprecated + +warn_deprecated('celery.task.trace', removal='3.2', + alternative='Please use celery.app.trace instead.') + +from celery.app import trace +sys.modules[__name__] = trace diff --git a/celery/tests/__init__.py b/celery/tests/__init__.py new file mode 100644 index 0000000..9667872 --- /dev/null +++ b/celery/tests/__init__.py @@ -0,0 +1,87 @@ +from __future__ import absolute_import + +import logging +import os +import sys +import warnings + +from importlib import import_module + +try: + WindowsError = WindowsError # noqa +except NameError: + + class WindowsError(Exception): + pass + + +def setup(): + os.environ.update( + # warn if config module not found + C_WNOCONF='yes', + KOMBU_DISABLE_LIMIT_PROTECTION='yes', + ) + + if os.environ.get('COVER_ALL_MODULES') or '--with-coverage' in sys.argv: + from warnings import catch_warnings + with catch_warnings(record=True): + import_all_modules() + warnings.resetwarnings() + from celery.tests.case import Trap + from celery._state import set_default_app + set_default_app(Trap()) + + +def teardown(): + # Don't want SUBDEBUG log messages at finalization. + try: + from multiprocessing.util import get_logger + except ImportError: + pass + else: + get_logger().setLevel(logging.WARNING) + + # Make sure test database is removed. + import os + if os.path.exists('test.db'): + try: + os.remove('test.db') + except WindowsError: + pass + + # Make sure there are no remaining threads at shutdown. + import threading + remaining_threads = [thread for thread in threading.enumerate() + if thread.getName() != 'MainThread'] + if remaining_threads: + sys.stderr.write( + '\n\n**WARNING**: Remaining threads at teardown: %r...\n' % ( + remaining_threads)) + + +def find_distribution_modules(name=__name__, file=__file__): + current_dist_depth = len(name.split('.')) - 1 + current_dist = os.path.join(os.path.dirname(file), + *([os.pardir] * current_dist_depth)) + abs = os.path.abspath(current_dist) + dist_name = os.path.basename(abs) + + for dirpath, dirnames, filenames in os.walk(abs): + package = (dist_name + dirpath[len(abs):]).replace('/', '.') + if '__init__.py' in filenames: + yield package + for filename in filenames: + if filename.endswith('.py') and filename != '__init__.py': + yield '.'.join([package, filename])[:-3] + + +def import_all_modules(name=__name__, file=__file__, + skip=('celery.decorators', + 'celery.contrib.batches', + 'celery.task')): + for module in find_distribution_modules(name, file): + if not module.startswith(skip): + try: + import_module(module) + except ImportError: + pass diff --git a/celery/tests/app/__init__.py b/celery/tests/app/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/celery/tests/app/test_amqp.py b/celery/tests/app/test_amqp.py new file mode 100644 index 0000000..efb398a --- /dev/null +++ b/celery/tests/app/test_amqp.py @@ -0,0 +1,228 @@ +from __future__ import absolute_import + +import datetime + +import pytz + +from kombu import Exchange, Queue + +from celery.app.amqp import Queues, TaskPublisher +from celery.five import keys +from celery.tests.case import AppCase, Mock + + +class test_TaskProducer(AppCase): + + def test__exit__(self): + publisher = self.app.amqp.TaskProducer(self.app.connection()) + publisher.release = Mock() + with publisher: + pass + publisher.release.assert_called_with() + + def test_declare(self): + publisher = self.app.amqp.TaskProducer(self.app.connection()) + publisher.exchange.name = 'foo' + publisher.declare() + publisher.exchange.name = None + publisher.declare() + + def test_retry_policy(self): + prod = self.app.amqp.TaskProducer(Mock()) + prod.channel.connection.client.declared_entities = set() + prod.publish_task('tasks.add', (2, 2), {}, + retry_policy={'frobulate': 32.4}) + + def test_publish_no_retry(self): + prod = self.app.amqp.TaskProducer(Mock()) + prod.channel.connection.client.declared_entities = set() + prod.publish_task('tasks.add', (2, 2), {}, retry=False, chord=123) + self.assertFalse(prod.connection.ensure.call_count) + + def test_publish_custom_queue(self): + prod = self.app.amqp.TaskProducer(Mock()) + self.app.amqp.queues['some_queue'] = Queue( + 'xxx', Exchange('yyy'), 'zzz', + ) + prod.channel.connection.client.declared_entities = set() + prod.publish = Mock() + prod.publish_task('tasks.add', (8, 8), {}, retry=False, + queue='some_queue') + self.assertEqual(prod.publish.call_args[1]['exchange'], 'yyy') + self.assertEqual(prod.publish.call_args[1]['routing_key'], 'zzz') + + def test_publish_with_countdown(self): + prod = self.app.amqp.TaskProducer(Mock()) + prod.channel.connection.client.declared_entities = set() + prod.publish = Mock() + now = datetime.datetime(2013, 11, 26, 16, 48, 46) + prod.publish_task('tasks.add', (1, 1), {}, retry=False, + countdown=10, now=now) + self.assertEqual( + prod.publish.call_args[0][0]['eta'], + '2013-11-26T16:48:56+00:00', + ) + + def test_publish_with_countdown_and_timezone(self): + # use timezone with fixed offset to be sure it won't be changed + self.app.conf.CELERY_TIMEZONE = pytz.FixedOffset(120) + prod = self.app.amqp.TaskProducer(Mock()) + prod.channel.connection.client.declared_entities = set() + prod.publish = Mock() + now = datetime.datetime(2013, 11, 26, 16, 48, 46) + prod.publish_task('tasks.add', (2, 2), {}, retry=False, + countdown=20, now=now) + self.assertEqual( + prod.publish.call_args[0][0]['eta'], + '2013-11-26T18:49:06+02:00', + ) + + def test_event_dispatcher(self): + prod = self.app.amqp.TaskProducer(Mock()) + self.assertTrue(prod.event_dispatcher) + self.assertFalse(prod.event_dispatcher.enabled) + + +class test_TaskConsumer(AppCase): + + def test_accept_content(self): + with self.app.pool.acquire(block=True) as conn: + self.app.conf.CELERY_ACCEPT_CONTENT = ['application/json'] + self.assertEqual( + self.app.amqp.TaskConsumer(conn).accept, + set(['application/json']) + ) + self.assertEqual( + self.app.amqp.TaskConsumer(conn, accept=['json']).accept, + set(['application/json']), + ) + + +class test_compat_TaskPublisher(AppCase): + + def test_compat_exchange_is_string(self): + producer = TaskPublisher(exchange='foo', app=self.app) + self.assertIsInstance(producer.exchange, Exchange) + self.assertEqual(producer.exchange.name, 'foo') + self.assertEqual(producer.exchange.type, 'direct') + producer = TaskPublisher(exchange='foo', exchange_type='topic', + app=self.app) + self.assertEqual(producer.exchange.type, 'topic') + + def test_compat_exchange_is_Exchange(self): + producer = TaskPublisher(exchange=Exchange('foo'), app=self.app) + self.assertEqual(producer.exchange.name, 'foo') + + +class test_PublisherPool(AppCase): + + def test_setup_nolimit(self): + self.app.conf.BROKER_POOL_LIMIT = None + try: + delattr(self.app, '_pool') + except AttributeError: + pass + self.app.amqp._producer_pool = None + pool = self.app.amqp.producer_pool + self.assertEqual(pool.limit, self.app.pool.limit) + self.assertFalse(pool._resource.queue) + + r1 = pool.acquire() + r2 = pool.acquire() + r1.release() + r2.release() + r1 = pool.acquire() + r2 = pool.acquire() + + def test_setup(self): + self.app.conf.BROKER_POOL_LIMIT = 2 + try: + delattr(self.app, '_pool') + except AttributeError: + pass + self.app.amqp._producer_pool = None + pool = self.app.amqp.producer_pool + self.assertEqual(pool.limit, self.app.pool.limit) + self.assertTrue(pool._resource.queue) + + p1 = r1 = pool.acquire() + p2 = r2 = pool.acquire() + r1.release() + r2.release() + r1 = pool.acquire() + r2 = pool.acquire() + self.assertIs(p2, r1) + self.assertIs(p1, r2) + r1.release() + r2.release() + + +class test_Queues(AppCase): + + def test_queues_format(self): + self.app.amqp.queues._consume_from = {} + self.assertEqual(self.app.amqp.queues.format(), '') + + def test_with_defaults(self): + self.assertEqual(Queues(None), {}) + + def test_add(self): + q = Queues() + q.add('foo', exchange='ex', routing_key='rk') + self.assertIn('foo', q) + self.assertIsInstance(q['foo'], Queue) + self.assertEqual(q['foo'].routing_key, 'rk') + + def test_with_ha_policy(self): + qn = Queues(ha_policy=None, create_missing=False) + qn.add('xyz') + self.assertIsNone(qn['xyz'].queue_arguments) + + qn.add('xyx', queue_arguments={'x-foo': 'bar'}) + self.assertEqual(qn['xyx'].queue_arguments, {'x-foo': 'bar'}) + + q = Queues(ha_policy='all', create_missing=False) + q.add(Queue('foo')) + self.assertEqual(q['foo'].queue_arguments, {'x-ha-policy': 'all'}) + + qq = Queue('xyx2', queue_arguments={'x-foo': 'bari'}) + q.add(qq) + self.assertEqual(q['xyx2'].queue_arguments, { + 'x-ha-policy': 'all', + 'x-foo': 'bari', + }) + + q2 = Queues(ha_policy=['A', 'B', 'C'], create_missing=False) + q2.add(Queue('foo')) + self.assertEqual(q2['foo'].queue_arguments, { + 'x-ha-policy': 'nodes', + 'x-ha-policy-params': ['A', 'B', 'C'], + }) + + def test_select_add(self): + q = Queues() + q.select(['foo', 'bar']) + q.select_add('baz') + self.assertItemsEqual(keys(q._consume_from), ['foo', 'bar', 'baz']) + + def test_deselect(self): + q = Queues() + q.select(['foo', 'bar']) + q.deselect('bar') + self.assertItemsEqual(keys(q._consume_from), ['foo']) + + def test_with_ha_policy_compat(self): + q = Queues(ha_policy='all') + q.add('bar') + self.assertEqual(q['bar'].queue_arguments, {'x-ha-policy': 'all'}) + + def test_add_default_exchange(self): + ex = Exchange('fff', 'fanout') + q = Queues(default_exchange=ex) + q.add(Queue('foo')) + self.assertEqual(q['foo'].exchange, ex) + + def test_alias(self): + q = Queues() + q.add(Queue('foo', alias='barfoo')) + self.assertIs(q['barfoo'], q['foo']) diff --git a/celery/tests/app/test_annotations.py b/celery/tests/app/test_annotations.py new file mode 100644 index 0000000..559f5cb --- /dev/null +++ b/celery/tests/app/test_annotations.py @@ -0,0 +1,56 @@ +from __future__ import absolute_import + +from celery.app.annotations import MapAnnotation, prepare +from celery.utils.imports import qualname + +from celery.tests.case import AppCase + + +class MyAnnotation(object): + foo = 65 + + +class AnnotationCase(AppCase): + + def setup(self): + @self.app.task(shared=False) + def add(x, y): + return x + y + self.add = add + + @self.app.task(shared=False) + def mul(x, y): + return x * y + self.mul = mul + + +class test_MapAnnotation(AnnotationCase): + + def test_annotate(self): + x = MapAnnotation({self.add.name: {'foo': 1}}) + self.assertDictEqual(x.annotate(self.add), {'foo': 1}) + self.assertIsNone(x.annotate(self.mul)) + + def test_annotate_any(self): + x = MapAnnotation({'*': {'foo': 2}}) + self.assertDictEqual(x.annotate_any(), {'foo': 2}) + + x = MapAnnotation() + self.assertIsNone(x.annotate_any()) + + +class test_prepare(AnnotationCase): + + def test_dict_to_MapAnnotation(self): + x = prepare({self.add.name: {'foo': 3}}) + self.assertIsInstance(x[0], MapAnnotation) + + def test_returns_list(self): + self.assertListEqual(prepare(1), [1]) + self.assertListEqual(prepare([1]), [1]) + self.assertListEqual(prepare((1, )), [1]) + self.assertEqual(prepare(None), ()) + + def test_evalutes_qualnames(self): + self.assertEqual(prepare(qualname(MyAnnotation))[0]().foo, 65) + self.assertEqual(prepare([qualname(MyAnnotation)])[0]().foo, 65) diff --git a/celery/tests/app/test_app.py b/celery/tests/app/test_app.py new file mode 100644 index 0000000..80dd296 --- /dev/null +++ b/celery/tests/app/test_app.py @@ -0,0 +1,720 @@ +from __future__ import absolute_import + +import gc +import os +import itertools + +from copy import deepcopy +from pickle import loads, dumps + +from amqp import promise +from kombu import Exchange + +from celery import shared_task, current_app +from celery import app as _app +from celery import _state +from celery.app import base as _appbase +from celery.app import defaults +from celery.exceptions import ImproperlyConfigured +from celery.five import items +from celery.loaders.base import BaseLoader +from celery.platforms import pyimplementation +from celery.utils.serialization import pickle + +from celery.tests.case import ( + CELERY_TEST_CONFIG, + AppCase, + Mock, + depends_on_current_app, + mask_modules, + patch, + platform_pyimp, + sys_platform, + pypy_version, + with_environ, +) +from celery.utils import uuid +from celery.utils.mail import ErrorMail + +THIS_IS_A_KEY = 'this is a value' + + +class ObjectConfig(object): + FOO = 1 + BAR = 2 + +object_config = ObjectConfig() +dict_config = dict(FOO=10, BAR=20) + + +class ObjectConfig2(object): + LEAVE_FOR_WORK = True + MOMENT_TO_STOP = True + CALL_ME_BACK = 123456789 + WANT_ME_TO = False + UNDERSTAND_ME = True + + +class Object(object): + + def __init__(self, **kwargs): + for key, value in items(kwargs): + setattr(self, key, value) + + +def _get_test_config(): + return deepcopy(CELERY_TEST_CONFIG) +test_config = _get_test_config() + + +class test_module(AppCase): + + def test_default_app(self): + self.assertEqual(_app.default_app, _state.default_app) + + def test_bugreport(self): + self.assertTrue(_app.bugreport(app=self.app)) + + +class test_App(AppCase): + + def setup(self): + self.app.add_defaults(test_config) + + def test_task_autofinalize_disabled(self): + with self.Celery('xyzibari', autofinalize=False) as app: + @app.task + def ttafd(): + return 42 + + with self.assertRaises(RuntimeError): + ttafd() + + with self.Celery('xyzibari', autofinalize=False) as app: + @app.task + def ttafd2(): + return 42 + + app.finalize() + self.assertEqual(ttafd2(), 42) + + def test_registry_autofinalize_disabled(self): + with self.Celery('xyzibari', autofinalize=False) as app: + with self.assertRaises(RuntimeError): + app.tasks['celery.chain'] + app.finalize() + self.assertTrue(app.tasks['celery.chain']) + + def test_task(self): + with self.Celery('foozibari') as app: + + def fun(): + pass + + fun.__module__ = '__main__' + task = app.task(fun) + self.assertEqual(task.name, app.main + '.fun') + + def test_with_config_source(self): + with self.Celery(config_source=ObjectConfig) as app: + self.assertEqual(app.conf.FOO, 1) + self.assertEqual(app.conf.BAR, 2) + + @depends_on_current_app + def test_task_windows_execv(self): + prev, _appbase._EXECV = _appbase._EXECV, True + try: + + @self.app.task(shared=False) + def foo(): + pass + + self.assertTrue(foo._get_current_object()) # is proxy + + finally: + _appbase._EXECV = prev + assert not _appbase._EXECV + + def test_task_takes_no_args(self): + with self.assertRaises(TypeError): + @self.app.task(1) + def foo(): + pass + + def test_add_defaults(self): + self.assertFalse(self.app.configured) + _conf = {'FOO': 300} + conf = lambda: _conf + self.app.add_defaults(conf) + self.assertIn(conf, self.app._pending_defaults) + self.assertFalse(self.app.configured) + self.assertEqual(self.app.conf.FOO, 300) + self.assertTrue(self.app.configured) + self.assertFalse(self.app._pending_defaults) + + # defaults not pickled + appr = loads(dumps(self.app)) + with self.assertRaises(AttributeError): + appr.conf.FOO + + # add more defaults after configured + conf2 = {'FOO': 'BAR'} + self.app.add_defaults(conf2) + self.assertEqual(self.app.conf.FOO, 'BAR') + + self.assertIn(_conf, self.app.conf.defaults) + self.assertIn(conf2, self.app.conf.defaults) + + def test_connection_or_acquire(self): + with self.app.connection_or_acquire(block=True): + self.assertTrue(self.app.pool._dirty) + + with self.app.connection_or_acquire(pool=False): + self.assertFalse(self.app.pool._dirty) + + def test_maybe_close_pool(self): + cpool = self.app._pool = Mock() + amqp = self.app.__dict__['amqp'] = Mock() + ppool = amqp._producer_pool + self.app._maybe_close_pool() + cpool.force_close_all.assert_called_with() + ppool.force_close_all.assert_called_with() + self.assertIsNone(self.app._pool) + self.assertIsNone(self.app.__dict__['amqp']._producer_pool) + + self.app._pool = Mock() + self.app._maybe_close_pool() + self.app._maybe_close_pool() + + def test_using_v1_reduce(self): + self.app._using_v1_reduce = True + self.assertTrue(loads(dumps(self.app))) + + def test_autodiscover_tasks_force(self): + self.app.loader.autodiscover_tasks = Mock() + self.app.autodiscover_tasks(['proj.A', 'proj.B'], force=True) + self.app.loader.autodiscover_tasks.assert_called_with( + ['proj.A', 'proj.B'], 'tasks', + ) + self.app.loader.autodiscover_tasks = Mock() + self.app.autodiscover_tasks( + lambda: ['proj.A', 'proj.B'], + related_name='george', + force=True, + ) + self.app.loader.autodiscover_tasks.assert_called_with( + ['proj.A', 'proj.B'], 'george', + ) + + def test_autodiscover_tasks_lazy(self): + with patch('celery.signals.import_modules') as import_modules: + packages = lambda: [1, 2, 3] + self.app.autodiscover_tasks(packages) + self.assertTrue(import_modules.connect.called) + prom = import_modules.connect.call_args[0][0] + self.assertIsInstance(prom, promise) + self.assertEqual(prom.fun, self.app._autodiscover_tasks) + self.assertEqual(prom.args[0](), [1, 2, 3]) + + @with_environ('CELERY_BROKER_URL', '') + def test_with_broker(self): + with self.Celery(broker='foo://baribaz') as app: + self.assertEqual(app.conf.BROKER_URL, 'foo://baribaz') + + def test_repr(self): + self.assertTrue(repr(self.app)) + + def test_custom_task_registry(self): + with self.Celery(tasks=self.app.tasks) as app2: + self.assertIs(app2.tasks, self.app.tasks) + + def test_include_argument(self): + with self.Celery(include=('foo', 'bar.foo')) as app: + self.assertEqual(app.conf.CELERY_IMPORTS, ('foo', 'bar.foo')) + + def test_set_as_current(self): + current = _state._tls.current_app + try: + app = self.Celery(set_as_current=True) + self.assertIs(_state._tls.current_app, app) + finally: + _state._tls.current_app = current + + def test_current_task(self): + @self.app.task + def foo(shared=False): + pass + + _state._task_stack.push(foo) + try: + self.assertEqual(self.app.current_task.name, foo.name) + finally: + _state._task_stack.pop() + + def test_task_not_shared(self): + with patch('celery.app.base.connect_on_app_finalize') as sh: + @self.app.task(shared=False) + def foo(): + pass + self.assertFalse(sh.called) + + def test_task_compat_with_filter(self): + with self.Celery(accept_magic_kwargs=True) as app: + check = Mock() + + def filter(task): + check(task) + return task + + @app.task(filter=filter, shared=False) + def foo(): + pass + check.assert_called_with(foo) + + def test_task_with_filter(self): + with self.Celery(accept_magic_kwargs=False) as app: + check = Mock() + + def filter(task): + check(task) + return task + + assert not _appbase._EXECV + + @app.task(filter=filter, shared=False) + def foo(): + pass + check.assert_called_with(foo) + + def test_task_sets_main_name_MP_MAIN_FILE(self): + from celery import utils as _utils + _utils.MP_MAIN_FILE = __file__ + try: + with self.Celery('xuzzy') as app: + + @app.task + def foo(): + pass + + self.assertEqual(foo.name, 'xuzzy.foo') + finally: + _utils.MP_MAIN_FILE = None + + def test_annotate_decorator(self): + from celery.app.task import Task + + class adX(Task): + abstract = True + + def run(self, y, z, x): + return y, z, x + + check = Mock() + + def deco(fun): + + def _inner(*args, **kwargs): + check(*args, **kwargs) + return fun(*args, **kwargs) + return _inner + + self.app.conf.CELERY_ANNOTATIONS = { + adX.name: {'@__call__': deco} + } + adX.bind(self.app) + self.assertIs(adX.app, self.app) + + i = adX() + i(2, 4, x=3) + check.assert_called_with(i, 2, 4, x=3) + + i.annotate() + i.annotate() + + def test_apply_async_has__self__(self): + @self.app.task(__self__='hello', shared=False) + def aawsX(): + pass + + with patch('celery.app.amqp.TaskProducer.publish_task') as dt: + aawsX.apply_async((4, 5)) + args = dt.call_args[0][1] + self.assertEqual(args, ('hello', 4, 5)) + + def test_apply_async_adds_children(self): + from celery._state import _task_stack + + @self.app.task(shared=False) + def a3cX1(self): + pass + + @self.app.task(shared=False) + def a3cX2(self): + pass + + _task_stack.push(a3cX1) + try: + a3cX1.push_request(called_directly=False) + try: + res = a3cX2.apply_async(add_to_parent=True) + self.assertIn(res, a3cX1.request.children) + finally: + a3cX1.pop_request() + finally: + _task_stack.pop() + + def test_pickle_app(self): + changes = dict(THE_FOO_BAR='bars', + THE_MII_MAR='jars') + self.app.conf.update(changes) + saved = pickle.dumps(self.app) + self.assertLess(len(saved), 2048) + restored = pickle.loads(saved) + self.assertDictContainsSubset(changes, restored.conf) + + def test_worker_main(self): + from celery.bin import worker as worker_bin + + class worker(worker_bin.worker): + + def execute_from_commandline(self, argv): + return argv + + prev, worker_bin.worker = worker_bin.worker, worker + try: + ret = self.app.worker_main(argv=['--version']) + self.assertListEqual(ret, ['--version']) + finally: + worker_bin.worker = prev + + def test_config_from_envvar(self): + os.environ['CELERYTEST_CONFIG_OBJECT'] = 'celery.tests.app.test_app' + self.app.config_from_envvar('CELERYTEST_CONFIG_OBJECT') + self.assertEqual(self.app.conf.THIS_IS_A_KEY, 'this is a value') + + def assert_config2(self): + self.assertTrue(self.app.conf.LEAVE_FOR_WORK) + self.assertTrue(self.app.conf.MOMENT_TO_STOP) + self.assertEqual(self.app.conf.CALL_ME_BACK, 123456789) + self.assertFalse(self.app.conf.WANT_ME_TO) + self.assertTrue(self.app.conf.UNDERSTAND_ME) + + def test_config_from_object__lazy(self): + conf = ObjectConfig2() + self.app.config_from_object(conf) + self.assertFalse(self.app.loader._conf) + self.assertIs(self.app._config_source, conf) + + self.assert_config2() + + def test_config_from_object__force(self): + self.app.config_from_object(ObjectConfig2(), force=True) + self.assertTrue(self.app.loader._conf) + + self.assert_config2() + + def test_config_from_cmdline(self): + cmdline = ['.always_eager=no', + '.result_backend=/dev/null', + 'celeryd.prefetch_multiplier=368', + '.foobarstring=(string)300', + '.foobarint=(int)300', + '.result_engine_options=(dict){"foo": "bar"}'] + self.app.config_from_cmdline(cmdline, namespace='celery') + self.assertFalse(self.app.conf.CELERY_ALWAYS_EAGER) + self.assertEqual(self.app.conf.CELERY_RESULT_BACKEND, '/dev/null') + self.assertEqual(self.app.conf.CELERYD_PREFETCH_MULTIPLIER, 368) + self.assertEqual(self.app.conf.CELERY_FOOBARSTRING, '300') + self.assertEqual(self.app.conf.CELERY_FOOBARINT, 300) + self.assertDictEqual(self.app.conf.CELERY_RESULT_ENGINE_OPTIONS, + {'foo': 'bar'}) + + def test_compat_setting_CELERY_BACKEND(self): + self.app._preconf = {} # removes result backend set by AppCase + self.app.config_from_object(Object(CELERY_BACKEND='set_by_us')) + self.assertEqual(self.app.conf.CELERY_RESULT_BACKEND, 'set_by_us') + + def test_setting_BROKER_TRANSPORT_OPTIONS(self): + + _args = {'foo': 'bar', 'spam': 'baz'} + + self.app.config_from_object(Object()) + self.assertEqual(self.app.conf.BROKER_TRANSPORT_OPTIONS, {}) + + self.app.config_from_object(Object(BROKER_TRANSPORT_OPTIONS=_args)) + self.assertEqual(self.app.conf.BROKER_TRANSPORT_OPTIONS, _args) + + def test_Windows_log_color_disabled(self): + self.app.IS_WINDOWS = True + self.assertFalse(self.app.log.supports_color(True)) + + def test_compat_setting_CARROT_BACKEND(self): + self.app.config_from_object(Object(CARROT_BACKEND='set_by_us')) + self.assertEqual(self.app.conf.BROKER_TRANSPORT, 'set_by_us') + + def test_WorkController(self): + x = self.app.WorkController + self.assertIs(x.app, self.app) + + def test_Worker(self): + x = self.app.Worker + self.assertIs(x.app, self.app) + + @depends_on_current_app + def test_AsyncResult(self): + x = self.app.AsyncResult('1') + self.assertIs(x.app, self.app) + r = loads(dumps(x)) + # not set as current, so ends up as default app after reduce + self.assertIs(r.app, current_app._get_current_object()) + + def test_get_active_apps(self): + self.assertTrue(list(_state._get_active_apps())) + + app1 = self.Celery() + appid = id(app1) + self.assertIn(app1, _state._get_active_apps()) + app1.close() + del(app1) + + gc.collect() + + # weakref removed from list when app goes out of scope. + with self.assertRaises(StopIteration): + next(app for app in _state._get_active_apps() if id(app) == appid) + + def test_config_from_envvar_more(self, key='CELERY_HARNESS_CFG1'): + self.assertFalse( + self.app.config_from_envvar( + 'HDSAJIHWIQHEWQU', force=True, silent=True), + ) + with self.assertRaises(ImproperlyConfigured): + self.app.config_from_envvar( + 'HDSAJIHWIQHEWQU', force=True, silent=False, + ) + os.environ[key] = __name__ + '.object_config' + self.assertTrue(self.app.config_from_envvar(key, force=True)) + self.assertEqual(self.app.conf['FOO'], 1) + self.assertEqual(self.app.conf['BAR'], 2) + + os.environ[key] = 'unknown_asdwqe.asdwqewqe' + with self.assertRaises(ImportError): + self.app.config_from_envvar(key, silent=False) + self.assertFalse( + self.app.config_from_envvar(key, force=True, silent=True), + ) + + os.environ[key] = __name__ + '.dict_config' + self.assertTrue(self.app.config_from_envvar(key, force=True)) + self.assertEqual(self.app.conf['FOO'], 10) + self.assertEqual(self.app.conf['BAR'], 20) + + @patch('celery.bin.celery.CeleryCommand.execute_from_commandline') + def test_start(self, execute): + self.app.start() + self.assertTrue(execute.called) + + def test_mail_admins(self): + + class Loader(BaseLoader): + + def mail_admins(*args, **kwargs): + return args, kwargs + + self.app.loader = Loader(app=self.app) + self.app.conf.ADMINS = None + self.assertFalse(self.app.mail_admins('Subject', 'Body')) + self.app.conf.ADMINS = [('George Costanza', 'george@vandelay.com')] + self.assertTrue(self.app.mail_admins('Subject', 'Body')) + + def test_amqp_get_broker_info(self): + self.assertDictContainsSubset( + {'hostname': 'localhost', + 'userid': 'guest', + 'password': 'guest', + 'virtual_host': '/'}, + self.app.connection('pyamqp://').info(), + ) + self.app.conf.BROKER_PORT = 1978 + self.app.conf.BROKER_VHOST = 'foo' + self.assertDictContainsSubset( + {'port': 1978, 'virtual_host': 'foo'}, + self.app.connection('pyamqp://:1978/foo').info(), + ) + conn = self.app.connection('pyamqp:////value') + self.assertDictContainsSubset({'virtual_host': '/value'}, + conn.info()) + + def test_amqp_failover_strategy_selection(self): + # Test passing in a string and make sure the string + # gets there untouched + self.app.conf.BROKER_FAILOVER_STRATEGY = 'foo-bar' + self.assertEqual( + self.app.connection('amqp:////value').failover_strategy, + 'foo-bar', + ) + + # Try passing in None + self.app.conf.BROKER_FAILOVER_STRATEGY = None + self.assertEqual( + self.app.connection('amqp:////value').failover_strategy, + itertools.cycle, + ) + + # Test passing in a method + def my_failover_strategy(it): + yield True + + self.app.conf.BROKER_FAILOVER_STRATEGY = my_failover_strategy + self.assertEqual( + self.app.connection('amqp:////value').failover_strategy, + my_failover_strategy, + ) + + def test_BROKER_BACKEND_alias(self): + self.assertEqual(self.app.conf.BROKER_BACKEND, + self.app.conf.BROKER_TRANSPORT) + + def test_after_fork(self): + p = self.app._pool = Mock() + self.app._after_fork(self.app) + p.force_close_all.assert_called_with() + self.assertIsNone(self.app._pool) + self.app._after_fork(self.app) + + def test_pool_no_multiprocessing(self): + with mask_modules('multiprocessing.util'): + pool = self.app.pool + self.assertIs(pool, self.app._pool) + + def test_bugreport(self): + self.assertTrue(self.app.bugreport()) + + def test_send_task_sent_event(self): + + class Dispatcher(object): + sent = [] + + def publish(self, type, fields, *args, **kwargs): + self.sent.append((type, fields)) + + conn = self.app.connection() + chan = conn.channel() + try: + for e in ('foo_exchange', 'moo_exchange', 'bar_exchange'): + chan.exchange_declare(e, 'direct', durable=True) + chan.queue_declare(e, durable=True) + chan.queue_bind(e, e, e) + finally: + chan.close() + assert conn.transport_cls == 'memory' + + prod = self.app.amqp.TaskProducer( + conn, exchange=Exchange('foo_exchange'), + send_sent_event=True, + ) + + dispatcher = Dispatcher() + self.assertTrue(prod.publish_task('footask', (), {}, + exchange='moo_exchange', + routing_key='moo_exchange', + event_dispatcher=dispatcher)) + self.assertTrue(dispatcher.sent) + self.assertEqual(dispatcher.sent[0][0], 'task-sent') + self.assertTrue(prod.publish_task('footask', (), {}, + event_dispatcher=dispatcher, + exchange='bar_exchange', + routing_key='bar_exchange')) + + def test_error_mail_sender(self): + x = ErrorMail.subject % {'name': 'task_name', + 'id': uuid(), + 'exc': 'FOOBARBAZ', + 'hostname': 'lana'} + self.assertTrue(x) + + def test_error_mail_disabled(self): + task = Mock() + x = ErrorMail(task) + x.should_send = Mock() + x.should_send.return_value = False + x.send(Mock(), Mock()) + self.assertFalse(task.app.mail_admins.called) + + +class test_defaults(AppCase): + + def test_strtobool(self): + for s in ('false', 'no', '0'): + self.assertFalse(defaults.strtobool(s)) + for s in ('true', 'yes', '1'): + self.assertTrue(defaults.strtobool(s)) + with self.assertRaises(TypeError): + defaults.strtobool('unsure') + + +class test_debugging_utils(AppCase): + + def test_enable_disable_trace(self): + try: + _app.enable_trace() + self.assertEqual(_app.app_or_default, _app._app_or_default_trace) + _app.disable_trace() + self.assertEqual(_app.app_or_default, _app._app_or_default) + finally: + _app.disable_trace() + + +class test_pyimplementation(AppCase): + + def test_platform_python_implementation(self): + with platform_pyimp(lambda: 'Xython'): + self.assertEqual(pyimplementation(), 'Xython') + + def test_platform_jython(self): + with platform_pyimp(): + with sys_platform('java 1.6.51'): + self.assertIn('Jython', pyimplementation()) + + def test_platform_pypy(self): + with platform_pyimp(): + with sys_platform('darwin'): + with pypy_version((1, 4, 3)): + self.assertIn('PyPy', pyimplementation()) + with pypy_version((1, 4, 3, 'a4')): + self.assertIn('PyPy', pyimplementation()) + + def test_platform_fallback(self): + with platform_pyimp(): + with sys_platform('darwin'): + with pypy_version(): + self.assertEqual('CPython', pyimplementation()) + + +class test_shared_task(AppCase): + + def test_registers_to_all_apps(self): + with self.Celery('xproj', set_as_current=True) as xproj: + xproj.finalize() + + @shared_task + def foo(): + return 42 + + @shared_task() + def bar(): + return 84 + + self.assertIs(foo.app, xproj) + self.assertIs(bar.app, xproj) + self.assertTrue(foo._get_current_object()) + + with self.Celery('yproj', set_as_current=True) as yproj: + self.assertIs(foo.app, yproj) + self.assertIs(bar.app, yproj) + + @shared_task() + def baz(): + return 168 + + self.assertIs(baz.app, yproj) diff --git a/celery/tests/app/test_beat.py b/celery/tests/app/test_beat.py new file mode 100644 index 0000000..6231080 --- /dev/null +++ b/celery/tests/app/test_beat.py @@ -0,0 +1,539 @@ +from __future__ import absolute_import + +import errno + +from datetime import datetime, timedelta +from pickle import dumps, loads + +from celery import beat +from celery.five import keys, string_t +from celery.schedules import schedule +from celery.utils import uuid +from celery.tests.case import AppCase, Mock, SkipTest, call, patch + + +class Object(object): + pass + + +class MockShelve(dict): + closed = False + synced = False + + def close(self): + self.closed = True + + def sync(self): + self.synced = True + + +class MockService(object): + started = False + stopped = False + + def __init__(self, *args, **kwargs): + pass + + def start(self, **kwargs): + self.started = True + + def stop(self, **kwargs): + self.stopped = True + + +class test_ScheduleEntry(AppCase): + Entry = beat.ScheduleEntry + + def create_entry(self, **kwargs): + entry = dict( + name='celery.unittest.add', + schedule=timedelta(seconds=10), + args=(2, 2), + options={'routing_key': 'cpu'}, + app=self.app, + ) + return self.Entry(**dict(entry, **kwargs)) + + def test_next(self): + entry = self.create_entry(schedule=10) + self.assertTrue(entry.last_run_at) + self.assertIsInstance(entry.last_run_at, datetime) + self.assertEqual(entry.total_run_count, 0) + + next_run_at = entry.last_run_at + timedelta(seconds=10) + next_entry = entry.next(next_run_at) + self.assertGreaterEqual(next_entry.last_run_at, next_run_at) + self.assertEqual(next_entry.total_run_count, 1) + + def test_is_due(self): + entry = self.create_entry(schedule=timedelta(seconds=10)) + self.assertIs(entry.app, self.app) + self.assertIs(entry.schedule.app, self.app) + due1, next_time_to_run1 = entry.is_due() + self.assertFalse(due1) + self.assertGreater(next_time_to_run1, 9) + + next_run_at = entry.last_run_at - timedelta(seconds=10) + next_entry = entry.next(next_run_at) + due2, next_time_to_run2 = next_entry.is_due() + self.assertTrue(due2) + self.assertGreater(next_time_to_run2, 9) + + def test_repr(self): + entry = self.create_entry() + self.assertIn(' 1: + return s.sh + raise OSError() + opens.side_effect = effect + s.setup_schedule() + s._remove_db.assert_called_with() + + s._store = {'__version__': 1} + s.setup_schedule() + + s._store.clear = Mock() + op = s.persistence.open = Mock() + op.return_value = s._store + s._store['tz'] = 'FUNKY' + s.setup_schedule() + op.assert_called_with(s.schedule_filename, writeback=True) + s._store.clear.assert_called_with() + s._store['utc_enabled'] = False + s._store.clear = Mock() + s.setup_schedule() + s._store.clear.assert_called_with() + + def test_get_schedule(self): + s = create_persistent_scheduler()[0]( + schedule_filename='schedule', app=self.app, + ) + s._store = {'entries': {}} + s.schedule = {'foo': 'bar'} + self.assertDictEqual(s.schedule, {'foo': 'bar'}) + self.assertDictEqual(s._store['entries'], s.schedule) + + +class test_Service(AppCase): + + def get_service(self): + Scheduler, mock_shelve = create_persistent_scheduler() + return beat.Service(app=self.app, scheduler_cls=Scheduler), mock_shelve + + def test_pickleable(self): + s = beat.Service(app=self.app, scheduler_cls=Mock) + self.assertTrue(loads(dumps(s))) + + def test_start(self): + s, sh = self.get_service() + schedule = s.scheduler.schedule + self.assertIsInstance(schedule, dict) + self.assertIsInstance(s.scheduler, beat.Scheduler) + scheduled = list(schedule.keys()) + for task_name in keys(sh['entries']): + self.assertIn(task_name, scheduled) + + s.sync() + self.assertTrue(sh.closed) + self.assertTrue(sh.synced) + self.assertTrue(s._is_stopped.isSet()) + s.sync() + s.stop(wait=False) + self.assertTrue(s._is_shutdown.isSet()) + s.stop(wait=True) + self.assertTrue(s._is_shutdown.isSet()) + + p = s.scheduler._store + s.scheduler._store = None + try: + s.scheduler.sync() + finally: + s.scheduler._store = p + + def test_start_embedded_process(self): + s, sh = self.get_service() + s._is_shutdown.set() + s.start(embedded_process=True) + + def test_start_thread(self): + s, sh = self.get_service() + s._is_shutdown.set() + s.start(embedded_process=False) + + def test_start_tick_raises_exit_error(self): + s, sh = self.get_service() + s.scheduler.tick_raises_exit = True + s.start() + self.assertTrue(s._is_shutdown.isSet()) + + def test_start_manages_one_tick_before_shutdown(self): + s, sh = self.get_service() + s.scheduler.shutdown_service = s + s.start() + self.assertTrue(s._is_shutdown.isSet()) + + +class test_EmbeddedService(AppCase): + + def test_start_stop_process(self): + try: + import _multiprocessing # noqa + except ImportError: + raise SkipTest('multiprocessing not available') + + from billiard.process import Process + + s = beat.EmbeddedService(app=self.app) + self.assertIsInstance(s, Process) + self.assertIsInstance(s.service, beat.Service) + s.service = MockService() + + class _Popen(object): + terminated = False + + def terminate(self): + self.terminated = True + + with patch('celery.platforms.close_open_fds'): + s.run() + self.assertTrue(s.service.started) + + s._popen = _Popen() + s.stop() + self.assertTrue(s.service.stopped) + self.assertTrue(s._popen.terminated) + + def test_start_stop_threaded(self): + s = beat.EmbeddedService(thread=True, app=self.app) + from threading import Thread + self.assertIsInstance(s, Thread) + self.assertIsInstance(s.service, beat.Service) + s.service = MockService() + + s.run() + self.assertTrue(s.service.started) + + s.stop() + self.assertTrue(s.service.stopped) + + +class test_schedule(AppCase): + + def test_maybe_make_aware(self): + x = schedule(10, app=self.app) + x.utc_enabled = True + d = x.maybe_make_aware(datetime.utcnow()) + self.assertTrue(d.tzinfo) + x.utc_enabled = False + d2 = x.maybe_make_aware(datetime.utcnow()) + self.assertIsNone(d2.tzinfo) + + def test_to_local(self): + x = schedule(10, app=self.app) + x.utc_enabled = True + d = x.to_local(datetime.utcnow()) + self.assertIsNone(d.tzinfo) + x.utc_enabled = False + d = x.to_local(datetime.utcnow()) + self.assertTrue(d.tzinfo) diff --git a/celery/tests/app/test_builtins.py b/celery/tests/app/test_builtins.py new file mode 100644 index 0000000..9b00c1a --- /dev/null +++ b/celery/tests/app/test_builtins.py @@ -0,0 +1,217 @@ +from __future__ import absolute_import + +from celery import group, chord +from celery.app import builtins +from celery.canvas import Signature +from celery.five import range +from celery._state import _task_stack +from celery.tests.case import AppCase, Mock, patch + + +class BuiltinsCase(AppCase): + + def setup(self): + @self.app.task(shared=False) + def xsum(x): + return sum(x) + self.xsum = xsum + + @self.app.task(shared=False) + def add(x, y): + return x + y + self.add = add + + +class test_backend_cleanup(BuiltinsCase): + + def test_run(self): + self.app.backend.cleanup = Mock() + self.app.backend.cleanup.__name__ = 'cleanup' + cleanup_task = builtins.add_backend_cleanup_task(self.app) + cleanup_task() + self.assertTrue(self.app.backend.cleanup.called) + + +class test_map(BuiltinsCase): + + def test_run(self): + + @self.app.task(shared=False) + def map_mul(x): + return x[0] * x[1] + + res = self.app.tasks['celery.map']( + map_mul, [(2, 2), (4, 4), (8, 8)], + ) + self.assertEqual(res, [4, 16, 64]) + + +class test_starmap(BuiltinsCase): + + def test_run(self): + + @self.app.task(shared=False) + def smap_mul(x, y): + return x * y + + res = self.app.tasks['celery.starmap']( + smap_mul, [(2, 2), (4, 4), (8, 8)], + ) + self.assertEqual(res, [4, 16, 64]) + + +class test_chunks(BuiltinsCase): + + @patch('celery.canvas.chunks.apply_chunks') + def test_run(self, apply_chunks): + + @self.app.task(shared=False) + def chunks_mul(l): + return l + + self.app.tasks['celery.chunks']( + chunks_mul, [(2, 2), (4, 4), (8, 8)], 1, + ) + self.assertTrue(apply_chunks.called) + + +class test_group(BuiltinsCase): + + def setup(self): + self.task = builtins.add_group_task(self.app)() + super(test_group, self).setup() + + def test_apply_async_eager(self): + self.task.apply = Mock() + self.app.conf.CELERY_ALWAYS_EAGER = True + self.task.apply_async() + self.assertTrue(self.task.apply.called) + + def test_apply(self): + x = group([self.add.s(4, 4), self.add.s(8, 8)]) + x.name = self.task.name + res = x.apply() + self.assertEqual(res.get(), [8, 16]) + + def test_apply_async(self): + x = group([self.add.s(4, 4), self.add.s(8, 8)]) + x.apply_async() + + def test_apply_empty(self): + x = group(app=self.app) + x.apply() + res = x.apply_async() + self.assertFalse(res) + self.assertFalse(res.results) + + def test_apply_async_with_parent(self): + _task_stack.push(self.add) + try: + self.add.push_request(called_directly=False) + try: + assert not self.add.request.children + x = group([self.add.s(4, 4), self.add.s(8, 8)]) + res = x() + self.assertTrue(self.add.request.children) + self.assertIn(res, self.add.request.children) + self.assertEqual(len(self.add.request.children), 1) + finally: + self.add.pop_request() + finally: + _task_stack.pop() + + +class test_chain(BuiltinsCase): + + def setup(self): + BuiltinsCase.setup(self) + self.task = builtins.add_chain_task(self.app)() + + def test_apply_async(self): + c = self.add.s(2, 2) | self.add.s(4) | self.add.s(8) + result = c.apply_async() + self.assertTrue(result.parent) + self.assertTrue(result.parent.parent) + self.assertIsNone(result.parent.parent.parent) + + def test_group_to_chord(self): + c = ( + group(self.add.s(i, i) for i in range(5)) | + self.add.s(10) | + self.add.s(20) | + self.add.s(30) + ) + tasks, _ = c.type.prepare_steps((), c.tasks) + self.assertIsInstance(tasks[0], chord) + self.assertTrue(tasks[0].body.options['link']) + self.assertTrue(tasks[0].body.options['link'][0].options['link']) + + c2 = self.add.s(2, 2) | group(self.add.s(i, i) for i in range(10)) + tasks2, _ = c2.type.prepare_steps((), c2.tasks) + self.assertIsInstance(tasks2[1], group) + + def test_apply_options(self): + + class static(Signature): + + def clone(self, *args, **kwargs): + return self + + def s(*args, **kwargs): + return static(self.add, args, kwargs, type=self.add) + + c = s(2, 2) | s(4, 4) | s(8, 8) + r1 = c.apply_async(task_id='some_id') + self.assertEqual(r1.id, 'some_id') + + c.apply_async(group_id='some_group_id') + self.assertEqual(c.tasks[-1].options['group_id'], 'some_group_id') + + c.apply_async(chord='some_chord_id') + self.assertEqual(c.tasks[-1].options['chord'], 'some_chord_id') + + c.apply_async(link=[s(32)]) + self.assertListEqual(c.tasks[-1].options['link'], [s(32)]) + + c.apply_async(link_error=[s('error')]) + for task in c.tasks: + self.assertListEqual(task.options['link_error'], [s('error')]) + + +class test_chord(BuiltinsCase): + + def setup(self): + self.task = builtins.add_chord_task(self.app)() + super(test_chord, self).setup() + + def test_apply_async(self): + x = chord([self.add.s(i, i) for i in range(10)], body=self.xsum.s()) + r = x.apply_async() + self.assertTrue(r) + self.assertTrue(r.parent) + + def test_run_header_not_group(self): + self.task([self.add.s(i, i) for i in range(10)], self.xsum.s()) + + def test_forward_options(self): + body = self.xsum.s() + x = chord([self.add.s(i, i) for i in range(10)], body=body) + x._type = Mock() + x._type.app.conf.CELERY_ALWAYS_EAGER = False + x.apply_async(group_id='some_group_id') + self.assertTrue(x._type.called) + resbody = x._type.call_args[0][1] + self.assertEqual(resbody.options['group_id'], 'some_group_id') + x2 = chord([self.add.s(i, i) for i in range(10)], body=body) + x2._type = Mock() + x2._type.app.conf.CELERY_ALWAYS_EAGER = False + x2.apply_async(chord='some_chord_id') + self.assertTrue(x2._type.called) + resbody = x2._type.call_args[0][1] + self.assertEqual(resbody.options['chord'], 'some_chord_id') + + def test_apply_eager(self): + self.app.conf.CELERY_ALWAYS_EAGER = True + x = chord([self.add.s(i, i) for i in range(10)], body=self.xsum.s()) + r = x.apply_async() + self.assertEqual(r.get(), 90) diff --git a/celery/tests/app/test_celery.py b/celery/tests/app/test_celery.py new file mode 100644 index 0000000..5088d35 --- /dev/null +++ b/celery/tests/app/test_celery.py @@ -0,0 +1,18 @@ +from __future__ import absolute_import +from celery.tests.case import AppCase + +import celery + + +class test_celery_package(AppCase): + + def test_version(self): + self.assertTrue(celery.VERSION) + self.assertGreaterEqual(len(celery.VERSION), 3) + celery.VERSION = (0, 3, 0) + self.assertGreaterEqual(celery.__version__.count('.'), 2) + + def test_meta(self): + for m in ('__author__', '__contact__', '__homepage__', + '__docformat__'): + self.assertTrue(getattr(celery, m, None)) diff --git a/celery/tests/app/test_control.py b/celery/tests/app/test_control.py new file mode 100644 index 0000000..ad4bc82 --- /dev/null +++ b/celery/tests/app/test_control.py @@ -0,0 +1,251 @@ +from __future__ import absolute_import + +import warnings + +from functools import wraps + +from kombu.pidbox import Mailbox + +from celery.app import control +from celery.utils import uuid +from celery.tests.case import AppCase + + +class MockMailbox(Mailbox): + sent = [] + + def _publish(self, command, *args, **kwargs): + self.__class__.sent.append(command) + + def close(self): + pass + + def _collect(self, *args, **kwargs): + pass + + +class Control(control.Control): + Mailbox = MockMailbox + + +def with_mock_broadcast(fun): + + @wraps(fun) + def _resets(*args, **kwargs): + MockMailbox.sent = [] + try: + return fun(*args, **kwargs) + finally: + MockMailbox.sent = [] + return _resets + + +class test_flatten_reply(AppCase): + + def test_flatten_reply(self): + reply = [ + {'foo@example.com': {'hello': 10}}, + {'foo@example.com': {'hello': 20}}, + {'bar@example.com': {'hello': 30}} + ] + with warnings.catch_warnings(record=True) as w: + nodes = control.flatten_reply(reply) + self.assertIn( + 'multiple replies', + str(w[-1].message), + ) + self.assertIn('foo@example.com', nodes) + self.assertIn('bar@example.com', nodes) + + +class test_inspect(AppCase): + + def setup(self): + self.c = Control(app=self.app) + self.prev, self.app.control = self.app.control, self.c + self.i = self.c.inspect() + + def test_prepare_reply(self): + self.assertDictEqual(self.i._prepare([{'w1': {'ok': 1}}, + {'w2': {'ok': 1}}]), + {'w1': {'ok': 1}, 'w2': {'ok': 1}}) + + i = self.c.inspect(destination='w1') + self.assertEqual(i._prepare([{'w1': {'ok': 1}}]), + {'ok': 1}) + + @with_mock_broadcast + def test_active(self): + self.i.active() + self.assertIn('dump_active', MockMailbox.sent) + + @with_mock_broadcast + def test_clock(self): + self.i.clock() + self.assertIn('clock', MockMailbox.sent) + + @with_mock_broadcast + def test_conf(self): + self.i.conf() + self.assertIn('dump_conf', MockMailbox.sent) + + @with_mock_broadcast + def test_hello(self): + self.i.hello('george@vandelay.com') + self.assertIn('hello', MockMailbox.sent) + + @with_mock_broadcast + def test_memsample(self): + self.i.memsample() + self.assertIn('memsample', MockMailbox.sent) + + @with_mock_broadcast + def test_memdump(self): + self.i.memdump() + self.assertIn('memdump', MockMailbox.sent) + + @with_mock_broadcast + def test_objgraph(self): + self.i.objgraph() + self.assertIn('objgraph', MockMailbox.sent) + + @with_mock_broadcast + def test_scheduled(self): + self.i.scheduled() + self.assertIn('dump_schedule', MockMailbox.sent) + + @with_mock_broadcast + def test_reserved(self): + self.i.reserved() + self.assertIn('dump_reserved', MockMailbox.sent) + + @with_mock_broadcast + def test_stats(self): + self.i.stats() + self.assertIn('stats', MockMailbox.sent) + + @with_mock_broadcast + def test_revoked(self): + self.i.revoked() + self.assertIn('dump_revoked', MockMailbox.sent) + + @with_mock_broadcast + def test_tasks(self): + self.i.registered() + self.assertIn('dump_tasks', MockMailbox.sent) + + @with_mock_broadcast + def test_ping(self): + self.i.ping() + self.assertIn('ping', MockMailbox.sent) + + @with_mock_broadcast + def test_active_queues(self): + self.i.active_queues() + self.assertIn('active_queues', MockMailbox.sent) + + @with_mock_broadcast + def test_report(self): + self.i.report() + self.assertIn('report', MockMailbox.sent) + + +class test_Broadcast(AppCase): + + def setup(self): + self.control = Control(app=self.app) + self.app.control = self.control + + @self.app.task(shared=False) + def mytask(): + pass + self.mytask = mytask + + def test_purge(self): + self.control.purge() + + @with_mock_broadcast + def test_broadcast(self): + self.control.broadcast('foobarbaz', arguments=[]) + self.assertIn('foobarbaz', MockMailbox.sent) + + @with_mock_broadcast + def test_broadcast_limit(self): + self.control.broadcast( + 'foobarbaz1', arguments=[], limit=None, destination=[1, 2, 3], + ) + self.assertIn('foobarbaz1', MockMailbox.sent) + + @with_mock_broadcast + def test_broadcast_validate(self): + with self.assertRaises(ValueError): + self.control.broadcast('foobarbaz2', + destination='foo') + + @with_mock_broadcast + def test_rate_limit(self): + self.control.rate_limit(self.mytask.name, '100/m') + self.assertIn('rate_limit', MockMailbox.sent) + + @with_mock_broadcast + def test_time_limit(self): + self.control.time_limit(self.mytask.name, soft=10, hard=20) + self.assertIn('time_limit', MockMailbox.sent) + + @with_mock_broadcast + def test_add_consumer(self): + self.control.add_consumer('foo') + self.assertIn('add_consumer', MockMailbox.sent) + + @with_mock_broadcast + def test_cancel_consumer(self): + self.control.cancel_consumer('foo') + self.assertIn('cancel_consumer', MockMailbox.sent) + + @with_mock_broadcast + def test_enable_events(self): + self.control.enable_events() + self.assertIn('enable_events', MockMailbox.sent) + + @with_mock_broadcast + def test_disable_events(self): + self.control.disable_events() + self.assertIn('disable_events', MockMailbox.sent) + + @with_mock_broadcast + def test_revoke(self): + self.control.revoke('foozbaaz') + self.assertIn('revoke', MockMailbox.sent) + + @with_mock_broadcast + def test_ping(self): + self.control.ping() + self.assertIn('ping', MockMailbox.sent) + + @with_mock_broadcast + def test_election(self): + self.control.election('some_id', 'topic', 'action') + self.assertIn('election', MockMailbox.sent) + + @with_mock_broadcast + def test_pool_grow(self): + self.control.pool_grow(2) + self.assertIn('pool_grow', MockMailbox.sent) + + @with_mock_broadcast + def test_pool_shrink(self): + self.control.pool_shrink(2) + self.assertIn('pool_shrink', MockMailbox.sent) + + @with_mock_broadcast + def test_revoke_from_result(self): + self.app.AsyncResult('foozbazzbar').revoke() + self.assertIn('revoke', MockMailbox.sent) + + @with_mock_broadcast + def test_revoke_from_resultset(self): + r = self.app.GroupResult(uuid(), + [self.app.AsyncResult(x) + for x in [uuid() for i in range(10)]]) + r.revoke() + self.assertIn('revoke', MockMailbox.sent) diff --git a/celery/tests/app/test_defaults.py b/celery/tests/app/test_defaults.py new file mode 100644 index 0000000..bf87f80 --- /dev/null +++ b/celery/tests/app/test_defaults.py @@ -0,0 +1,60 @@ +from __future__ import absolute_import + +import sys + +from importlib import import_module + +from celery.app.defaults import NAMESPACES + +from celery.tests.case import ( + AppCase, Mock, patch, pypy_version, sys_platform, +) + + +class test_defaults(AppCase): + + def setup(self): + self._prev = sys.modules.pop('celery.app.defaults', None) + + def teardown(self): + if self._prev: + sys.modules['celery.app.defaults'] = self._prev + + def test_option_repr(self): + self.assertTrue(repr(NAMESPACES['BROKER']['URL'])) + + def test_any(self): + val = object() + self.assertIs(self.defaults.Option.typemap['any'](val), val) + + def test_default_pool_pypy_14(self): + with sys_platform('darwin'): + with pypy_version((1, 4, 0)): + self.assertEqual(self.defaults.DEFAULT_POOL, 'solo') + + def test_default_pool_pypy_15(self): + with sys_platform('darwin'): + with pypy_version((1, 5, 0)): + self.assertEqual(self.defaults.DEFAULT_POOL, 'prefork') + + def test_deprecated(self): + source = Mock() + source.CELERYD_LOG_LEVEL = 2 + with patch('celery.utils.warn_deprecated') as warn: + self.defaults.find_deprecated_settings(source) + self.assertTrue(warn.called) + + def test_default_pool_jython(self): + with sys_platform('java 1.6.51'): + self.assertEqual(self.defaults.DEFAULT_POOL, 'threads') + + def test_find(self): + find = self.defaults.find + + self.assertEqual(find('server_email')[2].default, 'celery@localhost') + self.assertEqual(find('default_queue')[2].default, 'celery') + self.assertEqual(find('celery_default_exchange')[2], 'celery') + + @property + def defaults(self): + return import_module('celery.app.defaults') diff --git a/celery/tests/app/test_exceptions.py b/celery/tests/app/test_exceptions.py new file mode 100644 index 0000000..25d2b4e --- /dev/null +++ b/celery/tests/app/test_exceptions.py @@ -0,0 +1,35 @@ +from __future__ import absolute_import + +import pickle + +from datetime import datetime + +from celery.exceptions import Reject, Retry + +from celery.tests.case import AppCase + + +class test_Retry(AppCase): + + def test_when_datetime(self): + x = Retry('foo', KeyError(), when=datetime.utcnow()) + self.assertTrue(x.humanize()) + + def test_pickleable(self): + x = Retry('foo', KeyError(), when=datetime.utcnow()) + self.assertTrue(pickle.loads(pickle.dumps(x))) + + +class test_Reject(AppCase): + + def test_attrs(self): + x = Reject('foo', requeue=True) + self.assertEqual(x.reason, 'foo') + self.assertTrue(x.requeue) + + def test_repr(self): + self.assertTrue(repr(Reject('foo', True))) + + def test_pickleable(self): + x = Retry('foo', True) + self.assertTrue(pickle.loads(pickle.dumps(x))) diff --git a/celery/tests/app/test_loaders.py b/celery/tests/app/test_loaders.py new file mode 100644 index 0000000..ab69e50 --- /dev/null +++ b/celery/tests/app/test_loaders.py @@ -0,0 +1,271 @@ +from __future__ import absolute_import + +import os +import sys +import warnings + +from celery import loaders +from celery.exceptions import ( + NotConfigured, +) +from celery.loaders import base +from celery.loaders import default +from celery.loaders.app import AppLoader +from celery.utils.imports import NotAPackage +from celery.utils.mail import SendmailWarning + +from celery.tests.case import ( + AppCase, Case, Mock, depends_on_current_app, patch, with_environ, +) + + +class DummyLoader(base.BaseLoader): + + def read_configuration(self): + return {'foo': 'bar', 'CELERY_IMPORTS': ('os', 'sys')} + + +class test_loaders(AppCase): + + def test_get_loader_cls(self): + self.assertEqual(loaders.get_loader_cls('default'), + default.Loader) + + @depends_on_current_app + def test_current_loader(self): + with self.assertPendingDeprecation(): + self.assertIs(loaders.current_loader(), self.app.loader) + + @depends_on_current_app + def test_load_settings(self): + with self.assertPendingDeprecation(): + self.assertIs(loaders.load_settings(), self.app.conf) + + +class test_LoaderBase(AppCase): + message_options = {'subject': 'Subject', + 'body': 'Body', + 'sender': 'x@x.com', + 'to': 'y@x.com'} + server_options = {'host': 'smtp.x.com', + 'port': 1234, + 'user': 'x', + 'password': 'qwerty', + 'timeout': 3} + + def setup(self): + self.loader = DummyLoader(app=self.app) + + def test_handlers_pass(self): + self.loader.on_task_init('foo.task', 'feedface-cafebabe') + self.loader.on_worker_init() + + def test_now(self): + self.assertTrue(self.loader.now(utc=True)) + self.assertTrue(self.loader.now(utc=False)) + + def test_read_configuration_no_env(self): + self.assertDictEqual( + base.BaseLoader(app=self.app).read_configuration( + 'FOO_X_S_WE_WQ_Q_WE'), + {}, + ) + + def test_autodiscovery(self): + with patch('celery.loaders.base.autodiscover_tasks') as auto: + auto.return_value = [Mock()] + auto.return_value[0].__name__ = 'moo' + self.loader.autodiscover_tasks(['A', 'B']) + self.assertIn('moo', self.loader.task_modules) + self.loader.task_modules.discard('moo') + + def test_import_task_module(self): + self.assertEqual(sys, self.loader.import_task_module('sys')) + + def test_init_worker_process(self): + self.loader.on_worker_process_init() + m = self.loader.on_worker_process_init = Mock() + self.loader.init_worker_process() + m.assert_called_with() + + def test_config_from_object_module(self): + self.loader.import_from_cwd = Mock() + self.loader.config_from_object('module_name') + self.loader.import_from_cwd.assert_called_with('module_name') + + def test_conf_property(self): + self.assertEqual(self.loader.conf['foo'], 'bar') + self.assertEqual(self.loader._conf['foo'], 'bar') + self.assertEqual(self.loader.conf['foo'], 'bar') + + def test_import_default_modules(self): + modnames = lambda l: [m.__name__ for m in l] + self.app.conf.CELERY_IMPORTS = ('os', 'sys') + self.assertEqual( + sorted(modnames(self.loader.import_default_modules())), + sorted(modnames([os, sys])), + ) + + def test_import_from_cwd_custom_imp(self): + + def imp(module, package=None): + imp.called = True + imp.called = False + + self.loader.import_from_cwd('foo', imp=imp) + self.assertTrue(imp.called) + + @patch('celery.utils.mail.Mailer._send') + def test_mail_admins_errors(self, send): + send.side_effect = KeyError() + opts = dict(self.message_options, **self.server_options) + + with self.assertWarnsRegex(SendmailWarning, r'KeyError'): + self.loader.mail_admins(fail_silently=True, **opts) + + with self.assertRaises(KeyError): + self.loader.mail_admins(fail_silently=False, **opts) + + @patch('celery.utils.mail.Mailer._send') + def test_mail_admins(self, send): + opts = dict(self.message_options, **self.server_options) + self.loader.mail_admins(**opts) + self.assertTrue(send.call_args) + message = send.call_args[0][0] + self.assertEqual(message.to, [self.message_options['to']]) + self.assertEqual(message.subject, self.message_options['subject']) + self.assertEqual(message.sender, self.message_options['sender']) + self.assertEqual(message.body, self.message_options['body']) + + def test_mail_attribute(self): + from celery.utils import mail + loader = base.BaseLoader(app=self.app) + self.assertIs(loader.mail, mail) + + def test_cmdline_config_ValueError(self): + with self.assertRaises(ValueError): + self.loader.cmdline_config_parser(['broker.port=foobar']) + + +class test_DefaultLoader(AppCase): + + @patch('celery.loaders.base.find_module') + def test_read_configuration_not_a_package(self, find_module): + find_module.side_effect = NotAPackage() + l = default.Loader(app=self.app) + with self.assertRaises(NotAPackage): + l.read_configuration(fail_silently=False) + + @patch('celery.loaders.base.find_module') + @with_environ('CELERY_CONFIG_MODULE', 'celeryconfig.py') + def test_read_configuration_py_in_name(self, find_module): + find_module.side_effect = NotAPackage() + l = default.Loader(app=self.app) + with self.assertRaises(NotAPackage): + l.read_configuration(fail_silently=False) + + @patch('celery.loaders.base.find_module') + def test_read_configuration_importerror(self, find_module): + default.C_WNOCONF = True + find_module.side_effect = ImportError() + l = default.Loader(app=self.app) + with self.assertWarnsRegex(NotConfigured, r'make sure it exists'): + l.read_configuration(fail_silently=True) + default.C_WNOCONF = False + l.read_configuration(fail_silently=True) + + def test_read_configuration(self): + from types import ModuleType + + class ConfigModule(ModuleType): + pass + + configname = os.environ.get('CELERY_CONFIG_MODULE') or 'celeryconfig' + celeryconfig = ConfigModule(configname) + celeryconfig.CELERY_IMPORTS = ('os', 'sys') + + prevconfig = sys.modules.get(configname) + sys.modules[configname] = celeryconfig + try: + l = default.Loader(app=self.app) + l.find_module = Mock(name='find_module') + settings = l.read_configuration(fail_silently=False) + self.assertTupleEqual(settings.CELERY_IMPORTS, ('os', 'sys')) + settings = l.read_configuration(fail_silently=False) + self.assertTupleEqual(settings.CELERY_IMPORTS, ('os', 'sys')) + l.on_worker_init() + finally: + if prevconfig: + sys.modules[configname] = prevconfig + + def test_import_from_cwd(self): + l = default.Loader(app=self.app) + old_path = list(sys.path) + try: + sys.path.remove(os.getcwd()) + except ValueError: + pass + celery = sys.modules.pop('celery', None) + sys.modules.pop('celery.five', None) + try: + self.assertTrue(l.import_from_cwd('celery')) + sys.modules.pop('celery', None) + sys.modules.pop('celery.five', None) + sys.path.insert(0, os.getcwd()) + self.assertTrue(l.import_from_cwd('celery')) + finally: + sys.path = old_path + sys.modules['celery'] = celery + + def test_unconfigured_settings(self): + context_executed = [False] + + class _Loader(default.Loader): + + def find_module(self, name): + raise ImportError(name) + + with warnings.catch_warnings(record=True): + l = _Loader(app=self.app) + self.assertFalse(l.configured) + context_executed[0] = True + self.assertTrue(context_executed[0]) + + +class test_AppLoader(AppCase): + + def setup(self): + self.loader = AppLoader(app=self.app) + + def test_on_worker_init(self): + self.app.conf.CELERY_IMPORTS = ('subprocess', ) + sys.modules.pop('subprocess', None) + self.loader.init_worker() + self.assertIn('subprocess', sys.modules) + + +class test_autodiscovery(Case): + + def test_autodiscover_tasks(self): + base._RACE_PROTECTION = True + try: + base.autodiscover_tasks(['foo']) + finally: + base._RACE_PROTECTION = False + with patch('celery.loaders.base.find_related_module') as frm: + base.autodiscover_tasks(['foo']) + self.assertTrue(frm.called) + + def test_find_related_module(self): + with patch('importlib.import_module') as imp: + with patch('imp.find_module') as find: + imp.return_value = Mock() + imp.return_value.__path__ = 'foo' + base.find_related_module(base, 'tasks') + + imp.side_effect = AttributeError() + base.find_related_module(base, 'tasks') + imp.side_effect = None + + find.side_effect = ImportError() + base.find_related_module(base, 'tasks') diff --git a/celery/tests/app/test_log.py b/celery/tests/app/test_log.py new file mode 100644 index 0000000..588e39b --- /dev/null +++ b/celery/tests/app/test_log.py @@ -0,0 +1,385 @@ +from __future__ import absolute_import + +import sys +import logging + +from collections import defaultdict +from io import StringIO +from tempfile import mktemp + +from celery import signals +from celery.app.log import TaskFormatter +from celery.utils.log import LoggingProxy +from celery.utils import uuid +from celery.utils.log import ( + get_logger, + ColorFormatter, + logger as base_logger, + get_task_logger, + task_logger, + in_sighandler, + logger_isa, + ensure_process_aware_logger, +) +from celery.tests.case import ( + AppCase, Mock, SkipTest, + get_handlers, override_stdouts, patch, wrap_logger, restore_logging, +) + + +class test_TaskFormatter(AppCase): + + def test_no_task(self): + class Record(object): + msg = 'hello world' + levelname = 'info' + exc_text = exc_info = None + stack_info = None + + def getMessage(self): + return self.msg + record = Record() + x = TaskFormatter() + x.format(record) + self.assertEqual(record.task_name, '???') + self.assertEqual(record.task_id, '???') + + +class test_logger_isa(AppCase): + + def test_isa(self): + x = get_task_logger('Z1george') + self.assertTrue(logger_isa(x, task_logger)) + prev_x, x.parent = x.parent, None + try: + self.assertFalse(logger_isa(x, task_logger)) + finally: + x.parent = prev_x + + y = get_task_logger('Z1elaine') + y.parent = x + self.assertTrue(logger_isa(y, task_logger)) + self.assertTrue(logger_isa(y, x)) + self.assertTrue(logger_isa(y, y)) + + z = get_task_logger('Z1jerry') + z.parent = y + self.assertTrue(logger_isa(z, task_logger)) + self.assertTrue(logger_isa(z, y)) + self.assertTrue(logger_isa(z, x)) + self.assertTrue(logger_isa(z, z)) + + def test_recursive(self): + x = get_task_logger('X1foo') + prev, x.parent = x.parent, x + try: + with self.assertRaises(RuntimeError): + logger_isa(x, task_logger) + finally: + x.parent = prev + + y = get_task_logger('X2foo') + z = get_task_logger('X2foo') + prev_y, y.parent = y.parent, z + try: + prev_z, z.parent = z.parent, y + try: + with self.assertRaises(RuntimeError): + logger_isa(y, task_logger) + finally: + z.parent = prev_z + finally: + y.parent = prev_y + + +class test_ColorFormatter(AppCase): + + @patch('celery.utils.log.safe_str') + @patch('logging.Formatter.formatException') + def test_formatException_not_string(self, fe, safe_str): + x = ColorFormatter() + value = KeyError() + fe.return_value = value + self.assertIs(x.formatException(value), value) + self.assertTrue(fe.called) + self.assertFalse(safe_str.called) + + @patch('logging.Formatter.formatException') + @patch('celery.utils.log.safe_str') + def test_formatException_string(self, safe_str, fe): + x = ColorFormatter() + fe.return_value = 'HELLO' + try: + raise Exception() + except Exception: + self.assertTrue(x.formatException(sys.exc_info())) + if sys.version_info[0] == 2: + self.assertTrue(safe_str.called) + + @patch('logging.Formatter.format') + def test_format_object(self, _format): + x = ColorFormatter() + x.use_color = True + record = Mock() + record.levelname = 'ERROR' + record.msg = object() + self.assertTrue(x.format(record)) + + @patch('celery.utils.log.safe_str') + def test_format_raises(self, safe_str): + x = ColorFormatter() + + def on_safe_str(s): + try: + raise ValueError('foo') + finally: + safe_str.side_effect = None + safe_str.side_effect = on_safe_str + + class Record(object): + levelname = 'ERROR' + msg = 'HELLO' + exc_info = 1 + exc_text = 'error text' + stack_info = None + + def __str__(self): + return on_safe_str('') + + def getMessage(self): + return self.msg + + record = Record() + safe_str.return_value = record + + msg = x.format(record) + self.assertIn('= 3: + raise + else: + break + + def assertRelativedelta(self, due, last_ran): + try: + from dateutil.relativedelta import relativedelta + except ImportError: + return + l1, d1, n1 = due.remaining_delta(last_ran) + l2, d2, n2 = due.remaining_delta(last_ran, ffwd=relativedelta) + if not isinstance(d1, relativedelta): + self.assertEqual(l1, l2) + for field, value in items(d1._fields()): + self.assertEqual(getattr(d1, field), value) + self.assertFalse(d2.years) + self.assertFalse(d2.months) + self.assertFalse(d2.days) + self.assertFalse(d2.leapdays) + self.assertFalse(d2.hours) + self.assertFalse(d2.minutes) + self.assertFalse(d2.seconds) + self.assertFalse(d2.microseconds) + + def test_every_minute_execution_is_due(self): + last_ran = self.now - timedelta(seconds=61) + due, remaining = self.every_minute.is_due(last_ran) + self.assertRelativedelta(self.every_minute, last_ran) + self.assertTrue(due) + self.seconds_almost_equal(remaining, self.next_minute, 1) + + def test_every_minute_execution_is_not_due(self): + last_ran = self.now - timedelta(seconds=self.now.second) + due, remaining = self.every_minute.is_due(last_ran) + self.assertFalse(due) + self.seconds_almost_equal(remaining, self.next_minute, 1) + + def test_execution_is_due_on_saturday(self): + # 29th of May 2010 is a saturday + with patch_crontab_nowfun(self.hourly, datetime(2010, 5, 29, 10, 30)): + last_ran = self.now - timedelta(seconds=61) + due, remaining = self.every_minute.is_due(last_ran) + self.assertTrue(due) + self.seconds_almost_equal(remaining, self.next_minute, 1) + + def test_execution_is_due_on_sunday(self): + # 30th of May 2010 is a sunday + with patch_crontab_nowfun(self.hourly, datetime(2010, 5, 30, 10, 30)): + last_ran = self.now - timedelta(seconds=61) + due, remaining = self.every_minute.is_due(last_ran) + self.assertTrue(due) + self.seconds_almost_equal(remaining, self.next_minute, 1) + + def test_execution_is_due_on_monday(self): + # 31st of May 2010 is a monday + with patch_crontab_nowfun(self.hourly, datetime(2010, 5, 31, 10, 30)): + last_ran = self.now - timedelta(seconds=61) + due, remaining = self.every_minute.is_due(last_ran) + self.assertTrue(due) + self.seconds_almost_equal(remaining, self.next_minute, 1) + + def test_every_hour_execution_is_due(self): + with patch_crontab_nowfun(self.hourly, datetime(2010, 5, 10, 10, 30)): + due, remaining = self.hourly.is_due(datetime(2010, 5, 10, 6, 30)) + self.assertTrue(due) + self.assertEqual(remaining, 60 * 60) + + def test_every_hour_execution_is_not_due(self): + with patch_crontab_nowfun(self.hourly, datetime(2010, 5, 10, 10, 29)): + due, remaining = self.hourly.is_due(datetime(2010, 5, 10, 9, 30)) + self.assertFalse(due) + self.assertEqual(remaining, 60) + + def test_first_quarter_execution_is_due(self): + with patch_crontab_nowfun( + self.quarterly, datetime(2010, 5, 10, 10, 15)): + due, remaining = self.quarterly.is_due( + datetime(2010, 5, 10, 6, 30), + ) + self.assertTrue(due) + self.assertEqual(remaining, 15 * 60) + + def test_second_quarter_execution_is_due(self): + with patch_crontab_nowfun( + self.quarterly, datetime(2010, 5, 10, 10, 30)): + due, remaining = self.quarterly.is_due( + datetime(2010, 5, 10, 6, 30), + ) + self.assertTrue(due) + self.assertEqual(remaining, 15 * 60) + + def test_first_quarter_execution_is_not_due(self): + with patch_crontab_nowfun( + self.quarterly, datetime(2010, 5, 10, 10, 14)): + due, remaining = self.quarterly.is_due( + datetime(2010, 5, 10, 10, 0), + ) + self.assertFalse(due) + self.assertEqual(remaining, 60) + + def test_second_quarter_execution_is_not_due(self): + with patch_crontab_nowfun( + self.quarterly, datetime(2010, 5, 10, 10, 29)): + due, remaining = self.quarterly.is_due( + datetime(2010, 5, 10, 10, 15), + ) + self.assertFalse(due) + self.assertEqual(remaining, 60) + + def test_daily_execution_is_due(self): + with patch_crontab_nowfun(self.daily, datetime(2010, 5, 10, 7, 30)): + due, remaining = self.daily.is_due(datetime(2010, 5, 9, 7, 30)) + self.assertTrue(due) + self.assertEqual(remaining, 24 * 60 * 60) + + def test_daily_execution_is_not_due(self): + with patch_crontab_nowfun(self.daily, datetime(2010, 5, 10, 10, 30)): + due, remaining = self.daily.is_due(datetime(2010, 5, 10, 7, 30)) + self.assertFalse(due) + self.assertEqual(remaining, 21 * 60 * 60) + + def test_weekly_execution_is_due(self): + with patch_crontab_nowfun(self.weekly, datetime(2010, 5, 6, 7, 30)): + due, remaining = self.weekly.is_due(datetime(2010, 4, 30, 7, 30)) + self.assertTrue(due) + self.assertEqual(remaining, 7 * 24 * 60 * 60) + + def test_weekly_execution_is_not_due(self): + with patch_crontab_nowfun(self.weekly, datetime(2010, 5, 7, 10, 30)): + due, remaining = self.weekly.is_due(datetime(2010, 5, 6, 7, 30)) + self.assertFalse(due) + self.assertEqual(remaining, 6 * 24 * 60 * 60 - 3 * 60 * 60) + + def test_monthly_execution_is_due(self): + with patch_crontab_nowfun(self.monthly, datetime(2010, 5, 13, 7, 30)): + due, remaining = self.monthly.is_due(datetime(2010, 4, 8, 7, 30)) + self.assertTrue(due) + self.assertEqual(remaining, 28 * 24 * 60 * 60) + + def test_monthly_execution_is_not_due(self): + with patch_crontab_nowfun(self.monthly, datetime(2010, 5, 9, 10, 30)): + due, remaining = self.monthly.is_due(datetime(2010, 4, 8, 7, 30)) + self.assertFalse(due) + self.assertEqual(remaining, 4 * 24 * 60 * 60 - 3 * 60 * 60) + + def test_monthly_moy_execution_is_due(self): + with patch_crontab_nowfun( + self.monthly_moy, datetime(2014, 2, 26, 22, 0)): + due, remaining = self.monthly_moy.is_due( + datetime(2013, 7, 4, 10, 0), + ) + self.assertTrue(due) + self.assertEqual(remaining, 60.) + + def test_monthly_moy_execution_is_not_due(self): + raise SkipTest('unstable test') + with patch_crontab_nowfun( + self.monthly_moy, datetime(2013, 6, 28, 14, 30)): + due, remaining = self.monthly_moy.is_due( + datetime(2013, 6, 28, 22, 14), + ) + self.assertFalse(due) + attempt = ( + time.mktime(datetime(2014, 2, 26, 22, 0).timetuple()) - + time.mktime(datetime(2013, 6, 28, 14, 30).timetuple()) - + 60 * 60 + ) + self.assertEqual(remaining, attempt) + + def test_monthly_moy_execution_is_due2(self): + with patch_crontab_nowfun( + self.monthly_moy, datetime(2014, 2, 26, 22, 0)): + due, remaining = self.monthly_moy.is_due( + datetime(2013, 2, 28, 10, 0), + ) + self.assertTrue(due) + self.assertEqual(remaining, 60.) + + def test_monthly_moy_execution_is_not_due2(self): + with patch_crontab_nowfun( + self.monthly_moy, datetime(2014, 2, 26, 21, 0)): + due, remaining = self.monthly_moy.is_due( + datetime(2013, 6, 28, 22, 14), + ) + self.assertFalse(due) + attempt = 60 * 60 + self.assertEqual(remaining, attempt) + + def test_yearly_execution_is_due(self): + with patch_crontab_nowfun(self.yearly, datetime(2010, 3, 11, 7, 30)): + due, remaining = self.yearly.is_due(datetime(2009, 3, 12, 7, 30)) + self.assertTrue(due) + self.assertEqual(remaining, 364 * 24 * 60 * 60) + + def test_yearly_execution_is_not_due(self): + with patch_crontab_nowfun(self.yearly, datetime(2010, 3, 7, 10, 30)): + due, remaining = self.yearly.is_due(datetime(2009, 3, 12, 7, 30)) + self.assertFalse(due) + self.assertEqual(remaining, 4 * 24 * 60 * 60 - 3 * 60 * 60) diff --git a/celery/tests/app/test_utils.py b/celery/tests/app/test_utils.py new file mode 100644 index 0000000..b0ff108 --- /dev/null +++ b/celery/tests/app/test_utils.py @@ -0,0 +1,46 @@ +from __future__ import absolute_import + +from collections import Mapping, MutableMapping + +from celery.app.utils import Settings, filter_hidden_settings, bugreport + +from celery.tests.case import AppCase, Mock + + +class TestSettings(AppCase): + """ + Tests of celery.app.utils.Settings + """ + def test_is_mapping(self): + """Settings should be a collections.Mapping""" + self.assertTrue(issubclass(Settings, Mapping)) + + def test_is_mutable_mapping(self): + """Settings should be a collections.MutableMapping""" + self.assertTrue(issubclass(Settings, MutableMapping)) + + +class test_filter_hidden_settings(AppCase): + + def test_handles_non_string_keys(self): + """filter_hidden_settings shouldn't raise an exception when handling + mappings with non-string keys""" + conf = { + 'STRING_KEY': 'VALUE1', + ('NON', 'STRING', 'KEY'): 'VALUE2', + 'STRING_KEY2': { + 'STRING_KEY3': 1, + ('NON', 'STRING', 'KEY', '2'): 2 + }, + } + filter_hidden_settings(conf) + + +class test_bugreport(AppCase): + + def test_no_conn_driver_info(self): + self.app.connection = Mock() + conn = self.app.connection.return_value = Mock() + conn.transport = None + + bugreport(self.app) diff --git a/celery/tests/backends/__init__.py b/celery/tests/backends/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/celery/tests/backends/test_amqp.py b/celery/tests/backends/test_amqp.py new file mode 100644 index 0000000..7bfa7c1 --- /dev/null +++ b/celery/tests/backends/test_amqp.py @@ -0,0 +1,358 @@ +from __future__ import absolute_import + +import pickle +import socket + +from contextlib import contextmanager +from datetime import timedelta +from pickle import dumps, loads + +from billiard.einfo import ExceptionInfo + +from celery import states +from celery.backends.amqp import AMQPBackend +from celery.exceptions import TimeoutError +from celery.five import Empty, Queue, range +from celery.utils import uuid + +from celery.tests.case import ( + AppCase, Mock, depends_on_current_app, patch, sleepdeprived, +) + + +class SomeClass(object): + + def __init__(self, data): + self.data = data + + +class test_AMQPBackend(AppCase): + + def create_backend(self, **opts): + opts = dict(dict(serializer='pickle', persistent=True), **opts) + return AMQPBackend(self.app, **opts) + + def test_mark_as_done(self): + tb1 = self.create_backend(max_cached_results=1) + tb2 = self.create_backend(max_cached_results=1) + + tid = uuid() + + tb1.mark_as_done(tid, 42) + self.assertEqual(tb2.get_status(tid), states.SUCCESS) + self.assertEqual(tb2.get_result(tid), 42) + self.assertTrue(tb2._cache.get(tid)) + self.assertTrue(tb2.get_result(tid), 42) + + @depends_on_current_app + def test_pickleable(self): + self.assertTrue(loads(dumps(self.create_backend()))) + + def test_revive(self): + tb = self.create_backend() + tb.revive(None) + + def test_is_pickled(self): + tb1 = self.create_backend() + tb2 = self.create_backend() + + tid2 = uuid() + result = {'foo': 'baz', 'bar': SomeClass(12345)} + tb1.mark_as_done(tid2, result) + # is serialized properly. + rindb = tb2.get_result(tid2) + self.assertEqual(rindb.get('foo'), 'baz') + self.assertEqual(rindb.get('bar').data, 12345) + + def test_mark_as_failure(self): + tb1 = self.create_backend() + tb2 = self.create_backend() + + tid3 = uuid() + try: + raise KeyError('foo') + except KeyError as exception: + einfo = ExceptionInfo() + tb1.mark_as_failure(tid3, exception, traceback=einfo.traceback) + self.assertEqual(tb2.get_status(tid3), states.FAILURE) + self.assertIsInstance(tb2.get_result(tid3), KeyError) + self.assertEqual(tb2.get_traceback(tid3), einfo.traceback) + + def test_repair_uuid(self): + from celery.backends.amqp import repair_uuid + for i in range(10): + tid = uuid() + self.assertEqual(repair_uuid(tid.replace('-', '')), tid) + + def test_expires_is_int(self): + b = self.create_backend(expires=48) + self.assertEqual(b.queue_arguments.get('x-expires'), 48 * 1000.0) + + def test_expires_is_float(self): + b = self.create_backend(expires=48.3) + self.assertEqual(b.queue_arguments.get('x-expires'), 48.3 * 1000.0) + + def test_expires_is_timedelta(self): + b = self.create_backend(expires=timedelta(minutes=1)) + self.assertEqual(b.queue_arguments.get('x-expires'), 60 * 1000.0) + + @sleepdeprived() + def test_store_result_retries(self): + iterations = [0] + stop_raising_at = [5] + + def publish(*args, **kwargs): + if iterations[0] > stop_raising_at[0]: + return + iterations[0] += 1 + raise KeyError('foo') + + backend = AMQPBackend(self.app) + from celery.app.amqp import TaskProducer + prod, TaskProducer.publish = TaskProducer.publish, publish + try: + with self.assertRaises(KeyError): + backend.retry_policy['max_retries'] = None + backend.store_result('foo', 'bar', 'STARTED') + + with self.assertRaises(KeyError): + backend.retry_policy['max_retries'] = 10 + backend.store_result('foo', 'bar', 'STARTED') + finally: + TaskProducer.publish = prod + + def assertState(self, retval, state): + self.assertEqual(retval['status'], state) + + def test_poll_no_messages(self): + b = self.create_backend() + self.assertState(b.get_task_meta(uuid()), states.PENDING) + + @contextmanager + def _result_context(self): + results = Queue() + + class Message(object): + acked = 0 + requeued = 0 + + def __init__(self, **merge): + self.payload = dict({'status': states.STARTED, + 'result': None}, **merge) + self.body = pickle.dumps(self.payload) + self.content_type = 'application/x-python-serialize' + self.content_encoding = 'binary' + + def ack(self, *args, **kwargs): + self.acked += 1 + + def requeue(self, *args, **kwargs): + self.requeued += 1 + + class MockBinding(object): + + def __init__(self, *args, **kwargs): + self.channel = Mock() + + def __call__(self, *args, **kwargs): + return self + + def declare(self): + pass + + def get(self, no_ack=False, accept=None): + try: + m = results.get(block=False) + if m: + m.accept = accept + return m + except Empty: + pass + + def is_bound(self): + return True + + class MockBackend(AMQPBackend): + Queue = MockBinding + + backend = MockBackend(self.app, max_cached_results=100) + backend._republish = Mock() + + yield results, backend, Message + + def test_backlog_limit_exceeded(self): + with self._result_context() as (results, backend, Message): + for i in range(1001): + results.put(Message(task_id='id', status=states.RECEIVED)) + with self.assertRaises(backend.BacklogLimitExceeded): + backend.get_task_meta('id') + + def test_poll_result(self): + with self._result_context() as (results, backend, Message): + tid = uuid() + # FFWD's to the latest state. + state_messages = [ + Message(task_id=tid, status=states.RECEIVED, seq=1), + Message(task_id=tid, status=states.STARTED, seq=2), + Message(task_id=tid, status=states.FAILURE, seq=3), + ] + for state_message in state_messages: + results.put(state_message) + r1 = backend.get_task_meta(tid) + self.assertDictContainsSubset( + {'status': states.FAILURE, 'seq': 3}, r1, + 'FFWDs to the last state', + ) + + # Caches last known state. + tid = uuid() + results.put(Message(task_id=tid)) + backend.get_task_meta(tid) + self.assertIn(tid, backend._cache, 'Caches last known state') + + self.assertTrue(state_messages[-1].requeued) + + # Returns cache if no new states. + results.queue.clear() + assert not results.qsize() + backend._cache[tid] = 'hello' + self.assertEqual( + backend.get_task_meta(tid), 'hello', + 'Returns cache if no new states', + ) + + def test_wait_for(self): + b = self.create_backend() + + tid = uuid() + with self.assertRaises(TimeoutError): + b.wait_for(tid, timeout=0.1) + b.store_result(tid, None, states.STARTED) + with self.assertRaises(TimeoutError): + b.wait_for(tid, timeout=0.1) + b.store_result(tid, None, states.RETRY) + with self.assertRaises(TimeoutError): + b.wait_for(tid, timeout=0.1) + b.store_result(tid, 42, states.SUCCESS) + self.assertEqual(b.wait_for(tid, timeout=1), 42) + b.store_result(tid, 56, states.SUCCESS) + self.assertEqual(b.wait_for(tid, timeout=1), 42, + 'result is cached') + self.assertEqual(b.wait_for(tid, timeout=1, cache=False), 56) + b.store_result(tid, KeyError('foo'), states.FAILURE) + with self.assertRaises(KeyError): + b.wait_for(tid, timeout=1, cache=False) + self.assertTrue(b.wait_for(tid, timeout=1, propagate=False)) + b.store_result(tid, KeyError('foo'), states.PENDING) + with self.assertRaises(TimeoutError): + b.wait_for(tid, timeout=0.01, cache=False) + + def test_drain_events_remaining_timeouts(self): + + class Connection(object): + + def drain_events(self, timeout=None): + pass + + b = self.create_backend() + with self.app.pool.acquire_channel(block=False) as (_, channel): + binding = b._create_binding(uuid()) + consumer = b.Consumer(channel, binding, no_ack=True) + with self.assertRaises(socket.timeout): + b.drain_events(Connection(), consumer, timeout=0.1) + + def test_get_many(self): + b = self.create_backend(max_cached_results=10) + + tids = [] + for i in range(10): + tid = uuid() + b.store_result(tid, i, states.SUCCESS) + tids.append(tid) + + res = list(b.get_many(tids, timeout=1)) + expected_results = [ + (task_id, { + 'status': states.SUCCESS, + 'result': i, + 'traceback': None, + 'task_id': task_id, + 'children': None, + }) + for i, task_id in enumerate(tids) + ] + self.assertEqual(sorted(res), sorted(expected_results)) + self.assertDictEqual(b._cache[res[0][0]], res[0][1]) + cached_res = list(b.get_many(tids, timeout=1)) + self.assertEqual(sorted(cached_res), sorted(expected_results)) + + # times out when not ready in cache (this shouldn't happen) + b._cache[res[0][0]]['status'] = states.RETRY + with self.assertRaises(socket.timeout): + list(b.get_many(tids, timeout=0.01)) + + # times out when result not yet ready + with self.assertRaises(socket.timeout): + tids = [uuid()] + b.store_result(tids[0], i, states.PENDING) + list(b.get_many(tids, timeout=0.01)) + + def test_get_many_raises_outer_block(self): + + class Backend(AMQPBackend): + + def Consumer(*args, **kwargs): + raise KeyError('foo') + + b = Backend(self.app) + with self.assertRaises(KeyError): + next(b.get_many(['id1'])) + + def test_get_many_raises_inner_block(self): + with patch('kombu.connection.Connection.drain_events') as drain: + drain.side_effect = KeyError('foo') + b = AMQPBackend(self.app) + with self.assertRaises(KeyError): + next(b.get_many(['id1'])) + + def test_consume_raises_inner_block(self): + with patch('kombu.connection.Connection.drain_events') as drain: + + def se(*args, **kwargs): + drain.side_effect = ValueError() + raise KeyError('foo') + drain.side_effect = se + b = AMQPBackend(self.app) + with self.assertRaises(ValueError): + next(b.consume('id1')) + + def test_no_expires(self): + b = self.create_backend(expires=None) + app = self.app + app.conf.CELERY_TASK_RESULT_EXPIRES = None + b = self.create_backend(expires=None) + with self.assertRaises(KeyError): + b.queue_arguments['x-expires'] + + def test_process_cleanup(self): + self.create_backend().process_cleanup() + + def test_reload_task_result(self): + with self.assertRaises(NotImplementedError): + self.create_backend().reload_task_result('x') + + def test_reload_group_result(self): + with self.assertRaises(NotImplementedError): + self.create_backend().reload_group_result('x') + + def test_save_group(self): + with self.assertRaises(NotImplementedError): + self.create_backend().save_group('x', 'x') + + def test_restore_group(self): + with self.assertRaises(NotImplementedError): + self.create_backend().restore_group('x') + + def test_delete_group(self): + with self.assertRaises(NotImplementedError): + self.create_backend().delete_group('x') diff --git a/celery/tests/backends/test_backends.py b/celery/tests/backends/test_backends.py new file mode 100644 index 0000000..c6a936b --- /dev/null +++ b/celery/tests/backends/test_backends.py @@ -0,0 +1,40 @@ +from __future__ import absolute_import + +from celery import backends +from celery.backends.amqp import AMQPBackend +from celery.backends.cache import CacheBackend +from celery.tests.case import AppCase, depends_on_current_app, patch + + +class test_backends(AppCase): + + def test_get_backend_aliases(self): + expects = [('amqp://', AMQPBackend), + ('cache+memory://', CacheBackend)] + + for url, expect_cls in expects: + backend, url = backends.get_backend_by_url(url, self.app.loader) + self.assertIsInstance( + backend(app=self.app, url=url), + expect_cls, + ) + + def test_unknown_backend(self): + with self.assertRaises(ImportError): + backends.get_backend_cls('fasodaopjeqijwqe', self.app.loader) + + @depends_on_current_app + def test_default_backend(self): + self.assertEqual(backends.default_backend, self.app.backend) + + def test_backend_by_url(self, url='redis://localhost/1'): + from celery.backends.redis import RedisBackend + backend, url_ = backends.get_backend_by_url(url, self.app.loader) + self.assertIs(backend, RedisBackend) + self.assertEqual(url_, url) + + def test_sym_raises_ValuError(self): + with patch('celery.backends.symbol_by_name') as sbn: + sbn.side_effect = ValueError() + with self.assertRaises(ValueError): + backends.get_backend_cls('xxx.xxx:foo', self.app.loader) diff --git a/celery/tests/backends/test_base.py b/celery/tests/backends/test_base.py new file mode 100644 index 0000000..58e3e8d --- /dev/null +++ b/celery/tests/backends/test_base.py @@ -0,0 +1,448 @@ +from __future__ import absolute_import + +import sys +import types + +from contextlib import contextmanager + +from celery.exceptions import ChordError +from celery.five import items, range +from celery.utils import serialization +from celery.utils.serialization import subclass_exception +from celery.utils.serialization import find_pickleable_exception as fnpe +from celery.utils.serialization import UnpickleableExceptionWrapper +from celery.utils.serialization import get_pickleable_exception as gpe + +from celery import states +from celery import group +from celery.backends.base import ( + BaseBackend, + KeyValueStoreBackend, + DisabledBackend, +) +from celery.result import result_from_tuple +from celery.utils import uuid + +from celery.tests.case import AppCase, Mock, SkipTest, patch + + +class wrapobject(object): + + def __init__(self, *args, **kwargs): + self.args = args + +if sys.version_info[0] == 3 or getattr(sys, 'pypy_version_info', None): + Oldstyle = None +else: + Oldstyle = types.ClassType('Oldstyle', (), {}) +Unpickleable = subclass_exception('Unpickleable', KeyError, 'foo.module') +Impossible = subclass_exception('Impossible', object, 'foo.module') +Lookalike = subclass_exception('Lookalike', wrapobject, 'foo.module') + + +class test_serialization(AppCase): + + def test_create_exception_cls(self): + self.assertTrue(serialization.create_exception_cls('FooError', 'm')) + self.assertTrue(serialization.create_exception_cls('FooError', 'm', + KeyError)) + + +class test_BaseBackend_interface(AppCase): + + def setup(self): + self.b = BaseBackend(self.app) + + def test__forget(self): + with self.assertRaises(NotImplementedError): + self.b._forget('SOMExx-N0Nex1stant-IDxx-') + + def test_forget(self): + with self.assertRaises(NotImplementedError): + self.b.forget('SOMExx-N0nex1stant-IDxx-') + + def test_on_chord_part_return(self): + self.b.on_chord_part_return(None, None, None) + + def test_apply_chord(self, unlock='celery.chord_unlock'): + self.app.tasks[unlock] = Mock() + self.b.apply_chord( + group(app=self.app), (), 'dakj221', None, + result=[self.app.AsyncResult(x) for x in [1, 2, 3]], + ) + self.assertTrue(self.app.tasks[unlock].apply_async.call_count) + + +class test_exception_pickle(AppCase): + + def test_oldstyle(self): + if Oldstyle is None: + raise SkipTest('py3k does not support old style classes') + self.assertTrue(fnpe(Oldstyle())) + + def test_BaseException(self): + self.assertIsNone(fnpe(Exception())) + + def test_get_pickleable_exception(self): + exc = Exception('foo') + self.assertEqual(gpe(exc), exc) + + def test_unpickleable(self): + self.assertIsInstance(fnpe(Unpickleable()), KeyError) + self.assertIsNone(fnpe(Impossible())) + + +class test_prepare_exception(AppCase): + + def setup(self): + self.b = BaseBackend(self.app) + + def test_unpickleable(self): + x = self.b.prepare_exception(Unpickleable(1, 2, 'foo')) + self.assertIsInstance(x, KeyError) + y = self.b.exception_to_python(x) + self.assertIsInstance(y, KeyError) + + def test_impossible(self): + x = self.b.prepare_exception(Impossible()) + self.assertIsInstance(x, UnpickleableExceptionWrapper) + self.assertTrue(str(x)) + y = self.b.exception_to_python(x) + self.assertEqual(y.__class__.__name__, 'Impossible') + if sys.version_info < (2, 5): + self.assertTrue(y.__class__.__module__) + else: + self.assertEqual(y.__class__.__module__, 'foo.module') + + def test_regular(self): + x = self.b.prepare_exception(KeyError('baz')) + self.assertIsInstance(x, KeyError) + y = self.b.exception_to_python(x) + self.assertIsInstance(y, KeyError) + + +class KVBackend(KeyValueStoreBackend): + mget_returns_dict = False + + def __init__(self, app, *args, **kwargs): + self.db = {} + super(KVBackend, self).__init__(app) + + def get(self, key): + return self.db.get(key) + + def set(self, key, value): + self.db[key] = value + + def mget(self, keys): + if self.mget_returns_dict: + return dict((key, self.get(key)) for key in keys) + else: + return [self.get(k) for k in keys] + + def delete(self, key): + self.db.pop(key, None) + + +class DictBackend(BaseBackend): + + def __init__(self, *args, **kwargs): + BaseBackend.__init__(self, *args, **kwargs) + self._data = {'can-delete': {'result': 'foo'}} + + def _restore_group(self, group_id): + if group_id == 'exists': + return {'result': 'group'} + + def _get_task_meta_for(self, task_id): + if task_id == 'task-exists': + return {'result': 'task'} + + def _delete_group(self, group_id): + self._data.pop(group_id, None) + + +class test_BaseBackend_dict(AppCase): + + def setup(self): + self.b = DictBackend(app=self.app) + + def test_delete_group(self): + self.b.delete_group('can-delete') + self.assertNotIn('can-delete', self.b._data) + + def test_prepare_exception_json(self): + x = DictBackend(self.app, serializer='json') + e = x.prepare_exception(KeyError('foo')) + self.assertIn('exc_type', e) + e = x.exception_to_python(e) + self.assertEqual(e.__class__.__name__, 'KeyError') + self.assertEqual(str(e), "'foo'") + + def test_save_group(self): + b = BaseBackend(self.app) + b._save_group = Mock() + b.save_group('foofoo', 'xxx') + b._save_group.assert_called_with('foofoo', 'xxx') + + def test_forget_interface(self): + b = BaseBackend(self.app) + with self.assertRaises(NotImplementedError): + b.forget('foo') + + def test_restore_group(self): + self.assertIsNone(self.b.restore_group('missing')) + self.assertIsNone(self.b.restore_group('missing')) + self.assertEqual(self.b.restore_group('exists'), 'group') + self.assertEqual(self.b.restore_group('exists'), 'group') + self.assertEqual(self.b.restore_group('exists', cache=False), 'group') + + def test_reload_group_result(self): + self.b._cache = {} + self.b.reload_group_result('exists') + self.b._cache['exists'] = {'result': 'group'} + + def test_reload_task_result(self): + self.b._cache = {} + self.b.reload_task_result('task-exists') + self.b._cache['task-exists'] = {'result': 'task'} + + def test_fail_from_current_stack(self): + self.b.mark_as_failure = Mock() + try: + raise KeyError('foo') + except KeyError as exc: + self.b.fail_from_current_stack('task_id') + self.assertTrue(self.b.mark_as_failure.called) + args = self.b.mark_as_failure.call_args[0] + self.assertEqual(args[0], 'task_id') + self.assertIs(args[1], exc) + self.assertTrue(args[2]) + + def test_prepare_value_serializes_group_result(self): + self.b.serializer = 'json' + g = self.app.GroupResult('group_id', [self.app.AsyncResult('foo')]) + v = self.b.prepare_value(g) + self.assertIsInstance(v, (list, tuple)) + self.assertEqual(result_from_tuple(v, app=self.app), g) + + v2 = self.b.prepare_value(g[0]) + self.assertIsInstance(v2, (list, tuple)) + self.assertEqual(result_from_tuple(v2, app=self.app), g[0]) + + self.b.serializer = 'pickle' + self.assertIsInstance(self.b.prepare_value(g), self.app.GroupResult) + + def test_is_cached(self): + b = BaseBackend(app=self.app, max_cached_results=1) + b._cache['foo'] = 1 + self.assertTrue(b.is_cached('foo')) + self.assertFalse(b.is_cached('false')) + + +class test_KeyValueStoreBackend(AppCase): + + def setup(self): + self.b = KVBackend(app=self.app) + + def test_on_chord_part_return(self): + assert not self.b.implements_incr + self.b.on_chord_part_return(None, None, None) + + def test_get_store_delete_result(self): + tid = uuid() + self.b.mark_as_done(tid, 'Hello world') + self.assertEqual(self.b.get_result(tid), 'Hello world') + self.assertEqual(self.b.get_status(tid), states.SUCCESS) + self.b.forget(tid) + self.assertEqual(self.b.get_status(tid), states.PENDING) + + def test_strip_prefix(self): + x = self.b.get_key_for_task('x1b34') + self.assertEqual(self.b._strip_prefix(x), 'x1b34') + self.assertEqual(self.b._strip_prefix('x1b34'), 'x1b34') + + def test_get_many(self): + for is_dict in True, False: + self.b.mget_returns_dict = is_dict + ids = dict((uuid(), i) for i in range(10)) + for id, i in items(ids): + self.b.mark_as_done(id, i) + it = self.b.get_many(list(ids)) + for i, (got_id, got_state) in enumerate(it): + self.assertEqual(got_state['result'], ids[got_id]) + self.assertEqual(i, 9) + self.assertTrue(list(self.b.get_many(list(ids)))) + + def test_get_many_times_out(self): + tasks = [uuid() for _ in range(4)] + self.b._cache[tasks[1]] = {'status': 'PENDING'} + with self.assertRaises(self.b.TimeoutError): + list(self.b.get_many(tasks, timeout=0.01, interval=0.01)) + + def test_chord_part_return_no_gid(self): + self.b.implements_incr = True + task = Mock() + state = 'SUCCESS' + result = 10 + task.request.group = None + self.b.get_key_for_chord = Mock() + self.b.get_key_for_chord.side_effect = AssertionError( + 'should not get here', + ) + self.assertIsNone(self.b.on_chord_part_return(task, state, result)) + + @contextmanager + def _chord_part_context(self, b): + + @self.app.task(shared=False) + def callback(result): + pass + + b.implements_incr = True + b.client = Mock() + with patch('celery.backends.base.GroupResult') as GR: + deps = GR.restore.return_value = Mock(name='DEPS') + deps.__len__ = Mock() + deps.__len__.return_value = 10 + b.incr = Mock() + b.incr.return_value = 10 + b.expire = Mock() + task = Mock() + task.request.group = 'grid' + cb = task.request.chord = callback.s() + task.request.chord.freeze() + callback.backend = b + callback.backend.fail_from_current_stack = Mock() + yield task, deps, cb + + def test_chord_part_return_propagate_set(self): + with self._chord_part_context(self.b) as (task, deps, _): + self.b.on_chord_part_return(task, 'SUCCESS', 10, propagate=True) + self.assertFalse(self.b.expire.called) + deps.delete.assert_called_with() + deps.join_native.assert_called_with(propagate=True, timeout=3.0) + + def test_chord_part_return_propagate_default(self): + with self._chord_part_context(self.b) as (task, deps, _): + self.b.on_chord_part_return(task, 'SUCCESS', 10, propagate=None) + self.assertFalse(self.b.expire.called) + deps.delete.assert_called_with() + deps.join_native.assert_called_with( + propagate=self.b.app.conf.CELERY_CHORD_PROPAGATES, + timeout=3.0, + ) + + def test_chord_part_return_join_raises_internal(self): + with self._chord_part_context(self.b) as (task, deps, callback): + deps._failed_join_report = lambda: iter([]) + deps.join_native.side_effect = KeyError('foo') + self.b.on_chord_part_return(task, 'SUCCESS', 10) + self.assertTrue(self.b.fail_from_current_stack.called) + args = self.b.fail_from_current_stack.call_args + exc = args[1]['exc'] + self.assertIsInstance(exc, ChordError) + self.assertIn('foo', str(exc)) + + def test_chord_part_return_join_raises_task(self): + b = KVBackend(serializer='pickle', app=self.app) + with self._chord_part_context(b) as (task, deps, callback): + deps._failed_join_report = lambda: iter([ + self.app.AsyncResult('culprit'), + ]) + deps.join_native.side_effect = KeyError('foo') + b.on_chord_part_return(task, 'SUCCESS', 10) + self.assertTrue(b.fail_from_current_stack.called) + args = b.fail_from_current_stack.call_args + exc = args[1]['exc'] + self.assertIsInstance(exc, ChordError) + self.assertIn('Dependency culprit raised', str(exc)) + + def test_restore_group_from_json(self): + b = KVBackend(serializer='json', app=self.app) + g = self.app.GroupResult( + 'group_id', + [self.app.AsyncResult('a'), self.app.AsyncResult('b')], + ) + b._save_group(g.id, g) + g2 = b._restore_group(g.id)['result'] + self.assertEqual(g2, g) + + def test_restore_group_from_pickle(self): + b = KVBackend(serializer='pickle', app=self.app) + g = self.app.GroupResult( + 'group_id', + [self.app.AsyncResult('a'), self.app.AsyncResult('b')], + ) + b._save_group(g.id, g) + g2 = b._restore_group(g.id)['result'] + self.assertEqual(g2, g) + + def test_chord_apply_fallback(self): + self.b.implements_incr = False + self.b.fallback_chord_unlock = Mock() + self.b.apply_chord( + group(app=self.app), (), 'group_id', 'body', + result='result', foo=1, + ) + self.b.fallback_chord_unlock.assert_called_with( + 'group_id', 'body', result='result', foo=1, + ) + + def test_get_missing_meta(self): + self.assertIsNone(self.b.get_result('xxx-missing')) + self.assertEqual(self.b.get_status('xxx-missing'), states.PENDING) + + def test_save_restore_delete_group(self): + tid = uuid() + tsr = self.app.GroupResult( + tid, [self.app.AsyncResult(uuid()) for _ in range(10)], + ) + self.b.save_group(tid, tsr) + self.b.restore_group(tid) + self.assertEqual(self.b.restore_group(tid), tsr) + self.b.delete_group(tid) + self.assertIsNone(self.b.restore_group(tid)) + + def test_restore_missing_group(self): + self.assertIsNone(self.b.restore_group('xxx-nonexistant')) + + +class test_KeyValueStoreBackend_interface(AppCase): + + def test_get(self): + with self.assertRaises(NotImplementedError): + KeyValueStoreBackend(self.app).get('a') + + def test_set(self): + with self.assertRaises(NotImplementedError): + KeyValueStoreBackend(self.app).set('a', 1) + + def test_incr(self): + with self.assertRaises(NotImplementedError): + KeyValueStoreBackend(self.app).incr('a') + + def test_cleanup(self): + self.assertFalse(KeyValueStoreBackend(self.app).cleanup()) + + def test_delete(self): + with self.assertRaises(NotImplementedError): + KeyValueStoreBackend(self.app).delete('a') + + def test_mget(self): + with self.assertRaises(NotImplementedError): + KeyValueStoreBackend(self.app).mget(['a']) + + def test_forget(self): + with self.assertRaises(NotImplementedError): + KeyValueStoreBackend(self.app).forget('a') + + +class test_DisabledBackend(AppCase): + + def test_store_result(self): + DisabledBackend(self.app).store_result() + + def test_is_disabled(self): + with self.assertRaises(NotImplementedError): + DisabledBackend(self.app).get_status('foo') diff --git a/celery/tests/backends/test_cache.py b/celery/tests/backends/test_cache.py new file mode 100644 index 0000000..051760a --- /dev/null +++ b/celery/tests/backends/test_cache.py @@ -0,0 +1,253 @@ +from __future__ import absolute_import + +import sys +import types + +from contextlib import contextmanager + +from kombu.utils.encoding import str_to_bytes + +from celery import signature +from celery import states +from celery import group +from celery.backends.cache import CacheBackend, DummyClient +from celery.exceptions import ImproperlyConfigured +from celery.five import items, string, text_t +from celery.utils import uuid + +from celery.tests.case import ( + AppCase, Mock, mask_modules, patch, reset_modules, +) + +PY3 = sys.version_info[0] == 3 + + +class SomeClass(object): + + def __init__(self, data): + self.data = data + + +class test_CacheBackend(AppCase): + + def setup(self): + self.tb = CacheBackend(backend='memory://', app=self.app) + self.tid = uuid() + + def test_no_backend(self): + self.app.conf.CELERY_CACHE_BACKEND = None + with self.assertRaises(ImproperlyConfigured): + CacheBackend(backend=None, app=self.app) + + def test_mark_as_done(self): + self.assertEqual(self.tb.get_status(self.tid), states.PENDING) + self.assertIsNone(self.tb.get_result(self.tid)) + + self.tb.mark_as_done(self.tid, 42) + self.assertEqual(self.tb.get_status(self.tid), states.SUCCESS) + self.assertEqual(self.tb.get_result(self.tid), 42) + + def test_is_pickled(self): + result = {'foo': 'baz', 'bar': SomeClass(12345)} + self.tb.mark_as_done(self.tid, result) + # is serialized properly. + rindb = self.tb.get_result(self.tid) + self.assertEqual(rindb.get('foo'), 'baz') + self.assertEqual(rindb.get('bar').data, 12345) + + def test_mark_as_failure(self): + try: + raise KeyError('foo') + except KeyError as exception: + self.tb.mark_as_failure(self.tid, exception) + self.assertEqual(self.tb.get_status(self.tid), states.FAILURE) + self.assertIsInstance(self.tb.get_result(self.tid), KeyError) + + def test_apply_chord(self): + tb = CacheBackend(backend='memory://', app=self.app) + gid, res = uuid(), [self.app.AsyncResult(uuid()) for _ in range(3)] + tb.apply_chord(group(app=self.app), (), gid, {}, result=res) + + @patch('celery.result.GroupResult.restore') + def test_on_chord_part_return(self, restore): + tb = CacheBackend(backend='memory://', app=self.app) + + deps = Mock() + deps.__len__ = Mock() + deps.__len__.return_value = 2 + restore.return_value = deps + task = Mock() + task.name = 'foobarbaz' + self.app.tasks['foobarbaz'] = task + task.request.chord = signature(task) + + gid, res = uuid(), [self.app.AsyncResult(uuid()) for _ in range(3)] + task.request.group = gid + tb.apply_chord(group(app=self.app), (), gid, {}, result=res) + + self.assertFalse(deps.join_native.called) + tb.on_chord_part_return(task, 'SUCCESS', 10) + self.assertFalse(deps.join_native.called) + + tb.on_chord_part_return(task, 'SUCCESS', 10) + deps.join_native.assert_called_with(propagate=True, timeout=3.0) + deps.delete.assert_called_with() + + def test_mget(self): + self.tb.set('foo', 1) + self.tb.set('bar', 2) + + self.assertDictEqual(self.tb.mget(['foo', 'bar']), + {'foo': 1, 'bar': 2}) + + def test_forget(self): + self.tb.mark_as_done(self.tid, {'foo': 'bar'}) + x = self.app.AsyncResult(self.tid, backend=self.tb) + x.forget() + self.assertIsNone(x.result) + + def test_process_cleanup(self): + self.tb.process_cleanup() + + def test_expires_as_int(self): + tb = CacheBackend(backend='memory://', expires=10, app=self.app) + self.assertEqual(tb.expires, 10) + + def test_unknown_backend_raises_ImproperlyConfigured(self): + with self.assertRaises(ImproperlyConfigured): + CacheBackend(backend='unknown://', app=self.app) + + +class MyMemcachedStringEncodingError(Exception): + pass + + +class MemcachedClient(DummyClient): + + def set(self, key, value, *args, **kwargs): + if PY3: + key_t, must_be, not_be, cod = bytes, 'string', 'bytes', 'decode' + else: + key_t, must_be, not_be, cod = text_t, 'bytes', 'string', 'encode' + if isinstance(key, key_t): + raise MyMemcachedStringEncodingError( + 'Keys must be {0}, not {1}. Convert your ' + 'strings using mystring.{2}(charset)!'.format( + must_be, not_be, cod)) + return super(MemcachedClient, self).set(key, value, *args, **kwargs) + + +class MockCacheMixin(object): + + @contextmanager + def mock_memcache(self): + memcache = types.ModuleType('memcache') + memcache.Client = MemcachedClient + memcache.Client.__module__ = memcache.__name__ + prev, sys.modules['memcache'] = sys.modules.get('memcache'), memcache + try: + yield True + finally: + if prev is not None: + sys.modules['memcache'] = prev + + @contextmanager + def mock_pylibmc(self): + pylibmc = types.ModuleType('pylibmc') + pylibmc.Client = MemcachedClient + pylibmc.Client.__module__ = pylibmc.__name__ + prev = sys.modules.get('pylibmc') + sys.modules['pylibmc'] = pylibmc + try: + yield True + finally: + if prev is not None: + sys.modules['pylibmc'] = prev + + +class test_get_best_memcache(AppCase, MockCacheMixin): + + def test_pylibmc(self): + with self.mock_pylibmc(): + with reset_modules('celery.backends.cache'): + from celery.backends import cache + cache._imp = [None] + self.assertEqual(cache.get_best_memcache()[0].__module__, + 'pylibmc') + + def test_memcache(self): + with self.mock_memcache(): + with reset_modules('celery.backends.cache'): + with mask_modules('pylibmc'): + from celery.backends import cache + cache._imp = [None] + self.assertEqual(cache.get_best_memcache()[0]().__module__, + 'memcache') + + def test_no_implementations(self): + with mask_modules('pylibmc', 'memcache'): + with reset_modules('celery.backends.cache'): + from celery.backends import cache + cache._imp = [None] + with self.assertRaises(ImproperlyConfigured): + cache.get_best_memcache() + + def test_cached(self): + with self.mock_pylibmc(): + with reset_modules('celery.backends.cache'): + from celery.backends import cache + cache._imp = [None] + cache.get_best_memcache()[0](behaviors={'foo': 'bar'}) + self.assertTrue(cache._imp[0]) + cache.get_best_memcache()[0]() + + def test_backends(self): + from celery.backends.cache import backends + with self.mock_memcache(): + for name, fun in items(backends): + self.assertTrue(fun()) + + +class test_memcache_key(AppCase, MockCacheMixin): + + def test_memcache_unicode_key(self): + with self.mock_memcache(): + with reset_modules('celery.backends.cache'): + with mask_modules('pylibmc'): + from celery.backends import cache + cache._imp = [None] + task_id, result = string(uuid()), 42 + b = cache.CacheBackend(backend='memcache', app=self.app) + b.store_result(task_id, result, status=states.SUCCESS) + self.assertEqual(b.get_result(task_id), result) + + def test_memcache_bytes_key(self): + with self.mock_memcache(): + with reset_modules('celery.backends.cache'): + with mask_modules('pylibmc'): + from celery.backends import cache + cache._imp = [None] + task_id, result = str_to_bytes(uuid()), 42 + b = cache.CacheBackend(backend='memcache', app=self.app) + b.store_result(task_id, result, status=states.SUCCESS) + self.assertEqual(b.get_result(task_id), result) + + def test_pylibmc_unicode_key(self): + with reset_modules('celery.backends.cache'): + with self.mock_pylibmc(): + from celery.backends import cache + cache._imp = [None] + task_id, result = string(uuid()), 42 + b = cache.CacheBackend(backend='memcache', app=self.app) + b.store_result(task_id, result, status=states.SUCCESS) + self.assertEqual(b.get_result(task_id), result) + + def test_pylibmc_bytes_key(self): + with reset_modules('celery.backends.cache'): + with self.mock_pylibmc(): + from celery.backends import cache + cache._imp = [None] + task_id, result = str_to_bytes(uuid()), 42 + b = cache.CacheBackend(backend='memcache', app=self.app) + b.store_result(task_id, result, status=states.SUCCESS) + self.assertEqual(b.get_result(task_id), result) diff --git a/celery/tests/backends/test_cassandra.py b/celery/tests/backends/test_cassandra.py new file mode 100644 index 0000000..1a43be9 --- /dev/null +++ b/celery/tests/backends/test_cassandra.py @@ -0,0 +1,190 @@ +from __future__ import absolute_import + +import socket + +from pickle import loads, dumps + +from celery import states +from celery.exceptions import ImproperlyConfigured +from celery.tests.case import ( + AppCase, Mock, mock_module, depends_on_current_app, +) + + +class Object(object): + pass + + +def install_exceptions(mod): + # py3k: cannot catch exceptions not ineheriting from BaseException. + + class NotFoundException(Exception): + pass + + class TException(Exception): + pass + + class InvalidRequestException(Exception): + pass + + class UnavailableException(Exception): + pass + + class TimedOutException(Exception): + pass + + class AllServersUnavailable(Exception): + pass + + mod.NotFoundException = NotFoundException + mod.TException = TException + mod.InvalidRequestException = InvalidRequestException + mod.TimedOutException = TimedOutException + mod.UnavailableException = UnavailableException + mod.AllServersUnavailable = AllServersUnavailable + + +class test_CassandraBackend(AppCase): + + def setup(self): + self.app.conf.update( + CASSANDRA_SERVERS=['example.com'], + CASSANDRA_KEYSPACE='keyspace', + CASSANDRA_COLUMN_FAMILY='columns', + ) + + def test_init_no_pycassa(self): + with mock_module('pycassa'): + from celery.backends import cassandra as mod + prev, mod.pycassa = mod.pycassa, None + try: + with self.assertRaises(ImproperlyConfigured): + mod.CassandraBackend(app=self.app) + finally: + mod.pycassa = prev + + def test_init_with_and_without_LOCAL_QUROM(self): + with mock_module('pycassa'): + from celery.backends import cassandra as mod + mod.pycassa = Mock() + install_exceptions(mod.pycassa) + cons = mod.pycassa.ConsistencyLevel = Object() + cons.LOCAL_QUORUM = 'foo' + + self.app.conf.CASSANDRA_READ_CONSISTENCY = 'LOCAL_FOO' + self.app.conf.CASSANDRA_WRITE_CONSISTENCY = 'LOCAL_FOO' + + mod.CassandraBackend(app=self.app) + cons.LOCAL_FOO = 'bar' + mod.CassandraBackend(app=self.app) + + # no servers raises ImproperlyConfigured + with self.assertRaises(ImproperlyConfigured): + self.app.conf.CASSANDRA_SERVERS = None + mod.CassandraBackend( + app=self.app, keyspace='b', column_family='c', + ) + + @depends_on_current_app + def test_reduce(self): + with mock_module('pycassa'): + from celery.backends.cassandra import CassandraBackend + self.assertTrue(loads(dumps(CassandraBackend(app=self.app)))) + + def test_get_task_meta_for(self): + with mock_module('pycassa'): + from celery.backends import cassandra as mod + mod.pycassa = Mock() + install_exceptions(mod.pycassa) + mod.Thrift = Mock() + install_exceptions(mod.Thrift) + x = mod.CassandraBackend(app=self.app) + Get_Column = x._get_column_family = Mock() + get_column = Get_Column.return_value = Mock() + get = get_column.get + META = get.return_value = { + 'task_id': 'task_id', + 'status': states.SUCCESS, + 'result': '1', + 'date_done': 'date', + 'traceback': '', + 'children': None, + } + x.decode = Mock() + x.detailed_mode = False + meta = x._get_task_meta_for('task_id') + self.assertEqual(meta['status'], states.SUCCESS) + + x.detailed_mode = True + row = get.return_value = Mock() + row.values.return_value = [Mock()] + x.decode.return_value = META + meta = x._get_task_meta_for('task_id') + self.assertEqual(meta['status'], states.SUCCESS) + x.decode.return_value = Mock() + + x.detailed_mode = False + get.side_effect = KeyError() + meta = x._get_task_meta_for('task_id') + self.assertEqual(meta['status'], states.PENDING) + + calls = [0] + end = [10] + + def work_eventually(*arg): + try: + if calls[0] > end[0]: + return META + raise socket.error() + finally: + calls[0] += 1 + get.side_effect = work_eventually + x._retry_timeout = 10 + x._retry_wait = 0.01 + meta = x._get_task_meta_for('task') + self.assertEqual(meta['status'], states.SUCCESS) + + x._retry_timeout = 0.1 + calls[0], end[0] = 0, 100 + with self.assertRaises(socket.error): + x._get_task_meta_for('task') + + def test_store_result(self): + with mock_module('pycassa'): + from celery.backends import cassandra as mod + mod.pycassa = Mock() + install_exceptions(mod.pycassa) + mod.Thrift = Mock() + install_exceptions(mod.Thrift) + x = mod.CassandraBackend(app=self.app) + Get_Column = x._get_column_family = Mock() + cf = Get_Column.return_value = Mock() + x.detailed_mode = False + x._store_result('task_id', 'result', states.SUCCESS) + self.assertTrue(cf.insert.called) + + cf.insert.reset() + x.detailed_mode = True + x._store_result('task_id', 'result', states.SUCCESS) + self.assertTrue(cf.insert.called) + + def test_process_cleanup(self): + with mock_module('pycassa'): + from celery.backends import cassandra as mod + x = mod.CassandraBackend(app=self.app) + x._column_family = None + x.process_cleanup() + + x._column_family = True + x.process_cleanup() + self.assertIsNone(x._column_family) + + def test_get_column_family(self): + with mock_module('pycassa'): + from celery.backends import cassandra as mod + mod.pycassa = Mock() + install_exceptions(mod.pycassa) + x = mod.CassandraBackend(app=self.app) + self.assertTrue(x._get_column_family()) + self.assertIsNotNone(x._column_family) + self.assertIs(x._get_column_family(), x._column_family) diff --git a/celery/tests/backends/test_couchbase.py b/celery/tests/backends/test_couchbase.py new file mode 100644 index 0000000..3dc6aad --- /dev/null +++ b/celery/tests/backends/test_couchbase.py @@ -0,0 +1,136 @@ +from __future__ import absolute_import + +from celery.backends import couchbase as module +from celery.backends.couchbase import CouchBaseBackend +from celery.exceptions import ImproperlyConfigured +from celery import backends +from celery.tests.case import ( + AppCase, MagicMock, Mock, SkipTest, patch, sentinel, +) + +try: + import couchbase +except ImportError: + couchbase = None # noqa + +COUCHBASE_BUCKET = 'celery_bucket' + + +class test_CouchBaseBackend(AppCase): + + def setup(self): + if couchbase is None: + raise SkipTest('couchbase is not installed.') + self.backend = CouchBaseBackend(app=self.app) + + def test_init_no_couchbase(self): + """test init no couchbase raises""" + prev, module.couchbase = module.couchbase, None + try: + with self.assertRaises(ImproperlyConfigured): + CouchBaseBackend(app=self.app) + finally: + module.couchbase = prev + + def test_init_no_settings(self): + """test init no settings""" + self.app.conf.CELERY_COUCHBASE_BACKEND_SETTINGS = [] + with self.assertRaises(ImproperlyConfigured): + CouchBaseBackend(app=self.app) + + def test_init_settings_is_None(self): + """Test init settings is None""" + self.app.conf.CELERY_COUCHBASE_BACKEND_SETTINGS = None + CouchBaseBackend(app=self.app) + + def test_get_connection_connection_exists(self): + with patch('couchbase.connection.Connection') as mock_Connection: + self.backend._connection = sentinel._connection + + connection = self.backend._get_connection() + + self.assertEqual(sentinel._connection, connection) + self.assertFalse(mock_Connection.called) + + def test_get(self): + """test_get + + CouchBaseBackend.get should return and take two params + db conn to couchbase is mocked. + TODO Should test on key not exists + + """ + self.app.conf.CELERY_COUCHBASE_BACKEND_SETTINGS = {} + x = CouchBaseBackend(app=self.app) + x._connection = Mock() + mocked_get = x._connection.get = Mock() + mocked_get.return_value.value = sentinel.retval + # should return None + self.assertEqual(x.get('1f3fab'), sentinel.retval) + x._connection.get.assert_called_once_with('1f3fab') + + def test_set(self): + """test_set + + CouchBaseBackend.set should return None and take two params + db conn to couchbase is mocked. + + """ + self.app.conf.CELERY_COUCHBASE_BACKEND_SETTINGS = None + x = CouchBaseBackend(app=self.app) + x._connection = MagicMock() + x._connection.set = MagicMock() + # should return None + self.assertIsNone(x.set(sentinel.key, sentinel.value)) + + def test_delete(self): + """test_delete + + CouchBaseBackend.delete should return and take two params + db conn to couchbase is mocked. + TODO Should test on key not exists + + """ + self.app.conf.CELERY_COUCHBASE_BACKEND_SETTINGS = {} + x = CouchBaseBackend(app=self.app) + x._connection = Mock() + mocked_delete = x._connection.delete = Mock() + mocked_delete.return_value = None + # should return None + self.assertIsNone(x.delete('1f3fab')) + x._connection.delete.assert_called_once_with('1f3fab') + + def test_config_params(self): + """test_config_params + + celery.conf.CELERY_COUCHBASE_BACKEND_SETTINGS is properly set + """ + self.app.conf.CELERY_COUCHBASE_BACKEND_SETTINGS = { + 'bucket': 'mycoolbucket', + 'host': ['here.host.com', 'there.host.com'], + 'username': 'johndoe', + 'password': 'mysecret', + 'port': '1234', + } + x = CouchBaseBackend(app=self.app) + self.assertEqual(x.bucket, 'mycoolbucket') + self.assertEqual(x.host, ['here.host.com', 'there.host.com'],) + self.assertEqual(x.username, 'johndoe',) + self.assertEqual(x.password, 'mysecret') + self.assertEqual(x.port, 1234) + + def test_backend_by_url(self, url='couchbase://myhost/mycoolbucket'): + from celery.backends.couchbase import CouchBaseBackend + backend, url_ = backends.get_backend_by_url(url, self.app.loader) + self.assertIs(backend, CouchBaseBackend) + self.assertEqual(url_, url) + + def test_backend_params_by_url(self): + url = 'couchbase://johndoe:mysecret@myhost:123/mycoolbucket' + with self.Celery(backend=url) as app: + x = app.backend + self.assertEqual(x.bucket, 'mycoolbucket') + self.assertEqual(x.host, 'myhost') + self.assertEqual(x.username, 'johndoe') + self.assertEqual(x.password, 'mysecret') + self.assertEqual(x.port, 123) diff --git a/celery/tests/backends/test_database.py b/celery/tests/backends/test_database.py new file mode 100644 index 0000000..6b5bf94 --- /dev/null +++ b/celery/tests/backends/test_database.py @@ -0,0 +1,196 @@ +from __future__ import absolute_import, unicode_literals + +from datetime import datetime + +from pickle import loads, dumps + +from celery import states +from celery.exceptions import ImproperlyConfigured +from celery.utils import uuid + +from celery.tests.case import ( + AppCase, + SkipTest, + depends_on_current_app, + mask_modules, + skip_if_pypy, + skip_if_jython, +) + +try: + import sqlalchemy # noqa +except ImportError: + DatabaseBackend = Task = TaskSet = retry = None # noqa +else: + from celery.backends.database import DatabaseBackend, retry + from celery.backends.database.models import Task, TaskSet + + +class SomeClass(object): + + def __init__(self, data): + self.data = data + + +class test_DatabaseBackend(AppCase): + + @skip_if_pypy + @skip_if_jython + def setup(self): + if DatabaseBackend is None: + raise SkipTest('sqlalchemy not installed') + self.uri = 'sqlite:///test.db' + + def test_retry_helper(self): + from celery.backends.database import DatabaseError + + calls = [0] + + @retry + def raises(): + calls[0] += 1 + raise DatabaseError(1, 2, 3) + + with self.assertRaises(DatabaseError): + raises(max_retries=5) + self.assertEqual(calls[0], 5) + + def test_missing_SQLAlchemy_raises_ImproperlyConfigured(self): + with mask_modules('sqlalchemy'): + from celery.backends.database import _sqlalchemy_installed + with self.assertRaises(ImproperlyConfigured): + _sqlalchemy_installed() + + def test_missing_dburi_raises_ImproperlyConfigured(self): + self.app.conf.CELERY_RESULT_DBURI = None + with self.assertRaises(ImproperlyConfigured): + DatabaseBackend(app=self.app) + + def test_missing_task_id_is_PENDING(self): + tb = DatabaseBackend(self.uri, app=self.app) + self.assertEqual(tb.get_status('xxx-does-not-exist'), states.PENDING) + + def test_missing_task_meta_is_dict_with_pending(self): + tb = DatabaseBackend(self.uri, app=self.app) + self.assertDictContainsSubset({ + 'status': states.PENDING, + 'task_id': 'xxx-does-not-exist-at-all', + 'result': None, + 'traceback': None + }, tb.get_task_meta('xxx-does-not-exist-at-all')) + + def test_mark_as_done(self): + tb = DatabaseBackend(self.uri, app=self.app) + + tid = uuid() + + self.assertEqual(tb.get_status(tid), states.PENDING) + self.assertIsNone(tb.get_result(tid)) + + tb.mark_as_done(tid, 42) + self.assertEqual(tb.get_status(tid), states.SUCCESS) + self.assertEqual(tb.get_result(tid), 42) + + def test_is_pickled(self): + tb = DatabaseBackend(self.uri, app=self.app) + + tid2 = uuid() + result = {'foo': 'baz', 'bar': SomeClass(12345)} + tb.mark_as_done(tid2, result) + # is serialized properly. + rindb = tb.get_result(tid2) + self.assertEqual(rindb.get('foo'), 'baz') + self.assertEqual(rindb.get('bar').data, 12345) + + def test_mark_as_started(self): + tb = DatabaseBackend(self.uri, app=self.app) + tid = uuid() + tb.mark_as_started(tid) + self.assertEqual(tb.get_status(tid), states.STARTED) + + def test_mark_as_revoked(self): + tb = DatabaseBackend(self.uri, app=self.app) + tid = uuid() + tb.mark_as_revoked(tid) + self.assertEqual(tb.get_status(tid), states.REVOKED) + + def test_mark_as_retry(self): + tb = DatabaseBackend(self.uri, app=self.app) + tid = uuid() + try: + raise KeyError('foo') + except KeyError as exception: + import traceback + trace = '\n'.join(traceback.format_stack()) + tb.mark_as_retry(tid, exception, traceback=trace) + self.assertEqual(tb.get_status(tid), states.RETRY) + self.assertIsInstance(tb.get_result(tid), KeyError) + self.assertEqual(tb.get_traceback(tid), trace) + + def test_mark_as_failure(self): + tb = DatabaseBackend(self.uri, app=self.app) + + tid3 = uuid() + try: + raise KeyError('foo') + except KeyError as exception: + import traceback + trace = '\n'.join(traceback.format_stack()) + tb.mark_as_failure(tid3, exception, traceback=trace) + self.assertEqual(tb.get_status(tid3), states.FAILURE) + self.assertIsInstance(tb.get_result(tid3), KeyError) + self.assertEqual(tb.get_traceback(tid3), trace) + + def test_forget(self): + tb = DatabaseBackend(self.uri, backend='memory://', app=self.app) + tid = uuid() + tb.mark_as_done(tid, {'foo': 'bar'}) + tb.mark_as_done(tid, {'foo': 'bar'}) + x = self.app.AsyncResult(tid, backend=tb) + x.forget() + self.assertIsNone(x.result) + + def test_process_cleanup(self): + tb = DatabaseBackend(self.uri, app=self.app) + tb.process_cleanup() + + @depends_on_current_app + def test_reduce(self): + tb = DatabaseBackend(self.uri, app=self.app) + self.assertTrue(loads(dumps(tb))) + + def test_save__restore__delete_group(self): + tb = DatabaseBackend(self.uri, app=self.app) + + tid = uuid() + res = {'something': 'special'} + self.assertEqual(tb.save_group(tid, res), res) + + res2 = tb.restore_group(tid) + self.assertEqual(res2, res) + + tb.delete_group(tid) + self.assertIsNone(tb.restore_group(tid)) + + self.assertIsNone(tb.restore_group('xxx-nonexisting-id')) + + def test_cleanup(self): + tb = DatabaseBackend(self.uri, app=self.app) + for i in range(10): + tb.mark_as_done(uuid(), 42) + tb.save_group(uuid(), {'foo': 'bar'}) + s = tb.ResultSession() + for t in s.query(Task).all(): + t.date_done = datetime.now() - tb.expires * 2 + for t in s.query(TaskSet).all(): + t.date_done = datetime.now() - tb.expires * 2 + s.commit() + s.close() + + tb.cleanup() + + def test_Task__repr__(self): + self.assertIn('foo', repr(Task('foo'))) + + def test_TaskSet__repr__(self): + self.assertIn('foo', repr(TaskSet('foo', None))) diff --git a/celery/tests/backends/test_mongodb.py b/celery/tests/backends/test_mongodb.py new file mode 100644 index 0000000..f3449f7 --- /dev/null +++ b/celery/tests/backends/test_mongodb.py @@ -0,0 +1,325 @@ +from __future__ import absolute_import + +import datetime +import uuid + +from pickle import loads, dumps + +from celery import states +from celery.backends import mongodb as module +from celery.backends.mongodb import MongoBackend, Bunch, pymongo +from celery.exceptions import ImproperlyConfigured +from celery.tests.case import ( + AppCase, MagicMock, Mock, SkipTest, ANY, + depends_on_current_app, patch, sentinel, +) + +COLLECTION = 'taskmeta_celery' +TASK_ID = str(uuid.uuid1()) +MONGODB_HOST = 'localhost' +MONGODB_PORT = 27017 +MONGODB_USER = 'mongo' +MONGODB_PASSWORD = '1234' +MONGODB_DATABASE = 'testing' +MONGODB_COLLECTION = 'collection1' + + +class test_MongoBackend(AppCase): + + def setup(self): + if pymongo is None: + raise SkipTest('pymongo is not installed.') + + R = self._reset = {} + R['encode'], MongoBackend.encode = MongoBackend.encode, Mock() + R['decode'], MongoBackend.decode = MongoBackend.decode, Mock() + R['Binary'], module.Binary = module.Binary, Mock() + R['datetime'], datetime.datetime = datetime.datetime, Mock() + + self.backend = MongoBackend(app=self.app) + + def teardown(self): + MongoBackend.encode = self._reset['encode'] + MongoBackend.decode = self._reset['decode'] + module.Binary = self._reset['Binary'] + datetime.datetime = self._reset['datetime'] + + def test_Bunch(self): + x = Bunch(foo='foo', bar=2) + self.assertEqual(x.foo, 'foo') + self.assertEqual(x.bar, 2) + + def test_init_no_mongodb(self): + prev, module.pymongo = module.pymongo, None + try: + with self.assertRaises(ImproperlyConfigured): + MongoBackend(app=self.app) + finally: + module.pymongo = prev + + def test_init_no_settings(self): + self.app.conf.CELERY_MONGODB_BACKEND_SETTINGS = [] + with self.assertRaises(ImproperlyConfigured): + MongoBackend(app=self.app) + + def test_init_settings_is_None(self): + self.app.conf.CELERY_MONGODB_BACKEND_SETTINGS = None + MongoBackend(app=self.app) + + def test_restore_group_no_entry(self): + x = MongoBackend(app=self.app) + x.collection = Mock() + fo = x.collection.find_one = Mock() + fo.return_value = None + self.assertIsNone(x._restore_group('1f3fab')) + + @depends_on_current_app + def test_reduce(self): + x = MongoBackend(app=self.app) + self.assertTrue(loads(dumps(x))) + + def test_get_connection_connection_exists(self): + + with patch('pymongo.MongoClient') as mock_Connection: + self.backend._connection = sentinel._connection + + connection = self.backend._get_connection() + + self.assertEqual(sentinel._connection, connection) + self.assertFalse(mock_Connection.called) + + def test_get_connection_no_connection_host(self): + + with patch('pymongo.MongoClient') as mock_Connection: + self.backend._connection = None + self.backend.host = MONGODB_HOST + self.backend.port = MONGODB_PORT + mock_Connection.return_value = sentinel.connection + + connection = self.backend._get_connection() + mock_Connection.assert_called_once_with( + host='mongodb://localhost:27017', max_pool_size=10, + auto_start_request=False) + self.assertEqual(sentinel.connection, connection) + + def test_get_connection_no_connection_mongodb_uri(self): + + with patch('pymongo.MongoClient') as mock_Connection: + mongodb_uri = 'mongodb://%s:%d' % (MONGODB_HOST, MONGODB_PORT) + self.backend._connection = None + self.backend.host = mongodb_uri + + mock_Connection.return_value = sentinel.connection + + connection = self.backend._get_connection() + mock_Connection.assert_called_once_with( + host=mongodb_uri, max_pool_size=10, + auto_start_request=False) + self.assertEqual(sentinel.connection, connection) + + @patch('celery.backends.mongodb.MongoBackend._get_connection') + def test_get_database_no_existing(self, mock_get_connection): + # Should really check for combinations of these two, to be complete. + self.backend.user = MONGODB_USER + self.backend.password = MONGODB_PASSWORD + + mock_database = Mock() + mock_connection = MagicMock(spec=['__getitem__']) + mock_connection.__getitem__.return_value = mock_database + mock_get_connection.return_value = mock_connection + + database = self.backend.database + + self.assertTrue(database is mock_database) + self.assertTrue(self.backend.__dict__['database'] is mock_database) + mock_database.authenticate.assert_called_once_with( + MONGODB_USER, MONGODB_PASSWORD) + + @patch('celery.backends.mongodb.MongoBackend._get_connection') + def test_get_database_no_existing_no_auth(self, mock_get_connection): + # Should really check for combinations of these two, to be complete. + self.backend.user = None + self.backend.password = None + + mock_database = Mock() + mock_connection = MagicMock(spec=['__getitem__']) + mock_connection.__getitem__.return_value = mock_database + mock_get_connection.return_value = mock_connection + + database = self.backend.database + + self.assertTrue(database is mock_database) + self.assertFalse(mock_database.authenticate.called) + self.assertTrue(self.backend.__dict__['database'] is mock_database) + + def test_process_cleanup(self): + self.backend._connection = None + self.backend.process_cleanup() + self.assertEqual(self.backend._connection, None) + + self.backend._connection = 'not none' + self.backend.process_cleanup() + self.assertEqual(self.backend._connection, None) + + @patch('celery.backends.mongodb.MongoBackend._get_database') + def test_store_result(self, mock_get_database): + self.backend.taskmeta_collection = MONGODB_COLLECTION + + mock_database = MagicMock(spec=['__getitem__', '__setitem__']) + mock_collection = Mock() + + mock_get_database.return_value = mock_database + mock_database.__getitem__.return_value = mock_collection + + ret_val = self.backend._store_result( + sentinel.task_id, sentinel.result, sentinel.status) + + mock_get_database.assert_called_once_with() + mock_database.__getitem__.assert_called_once_with(MONGODB_COLLECTION) + mock_collection.save.assert_called_once_with(ANY) + self.assertEqual(sentinel.result, ret_val) + + @patch('celery.backends.mongodb.MongoBackend._get_database') + def test_get_task_meta_for(self, mock_get_database): + datetime.datetime = self._reset['datetime'] + self.backend.taskmeta_collection = MONGODB_COLLECTION + + mock_database = MagicMock(spec=['__getitem__', '__setitem__']) + mock_collection = Mock() + mock_collection.find_one.return_value = MagicMock() + + mock_get_database.return_value = mock_database + mock_database.__getitem__.return_value = mock_collection + + ret_val = self.backend._get_task_meta_for(sentinel.task_id) + + mock_get_database.assert_called_once_with() + mock_database.__getitem__.assert_called_once_with(MONGODB_COLLECTION) + self.assertEqual( + list(sorted(['status', 'task_id', 'date_done', 'traceback', + 'result', 'children'])), + list(sorted(ret_val.keys())), + ) + + @patch('celery.backends.mongodb.MongoBackend._get_database') + def test_get_task_meta_for_no_result(self, mock_get_database): + self.backend.taskmeta_collection = MONGODB_COLLECTION + + mock_database = MagicMock(spec=['__getitem__', '__setitem__']) + mock_collection = Mock() + mock_collection.find_one.return_value = None + + mock_get_database.return_value = mock_database + mock_database.__getitem__.return_value = mock_collection + + ret_val = self.backend._get_task_meta_for(sentinel.task_id) + + mock_get_database.assert_called_once_with() + mock_database.__getitem__.assert_called_once_with(MONGODB_COLLECTION) + self.assertEqual({'status': states.PENDING, 'result': None}, ret_val) + + @patch('celery.backends.mongodb.MongoBackend._get_database') + def test_save_group(self, mock_get_database): + self.backend.taskmeta_collection = MONGODB_COLLECTION + + mock_database = MagicMock(spec=['__getitem__', '__setitem__']) + mock_collection = Mock() + + mock_get_database.return_value = mock_database + mock_database.__getitem__.return_value = mock_collection + + ret_val = self.backend._save_group( + sentinel.taskset_id, sentinel.result) + + mock_get_database.assert_called_once_with() + mock_database.__getitem__.assert_called_once_with(MONGODB_COLLECTION) + mock_collection.save.assert_called_once_with(ANY) + self.assertEqual(sentinel.result, ret_val) + + @patch('celery.backends.mongodb.MongoBackend._get_database') + def test_restore_group(self, mock_get_database): + self.backend.taskmeta_collection = MONGODB_COLLECTION + + mock_database = MagicMock(spec=['__getitem__', '__setitem__']) + mock_collection = Mock() + mock_collection.find_one.return_value = MagicMock() + + mock_get_database.return_value = mock_database + mock_database.__getitem__.return_value = mock_collection + + ret_val = self.backend._restore_group(sentinel.taskset_id) + + mock_get_database.assert_called_once_with() + mock_database.__getitem__.assert_called_once_with(MONGODB_COLLECTION) + mock_collection.find_one.assert_called_once_with( + {'_id': sentinel.taskset_id}) + self.assertEqual( + ['date_done', 'result', 'task_id'], + list(ret_val.keys()), + ) + + @patch('celery.backends.mongodb.MongoBackend._get_database') + def test_delete_group(self, mock_get_database): + self.backend.taskmeta_collection = MONGODB_COLLECTION + + mock_database = MagicMock(spec=['__getitem__', '__setitem__']) + mock_collection = Mock() + + mock_get_database.return_value = mock_database + mock_database.__getitem__.return_value = mock_collection + + self.backend._delete_group(sentinel.taskset_id) + + mock_get_database.assert_called_once_with() + mock_database.__getitem__.assert_called_once_with(MONGODB_COLLECTION) + mock_collection.remove.assert_called_once_with( + {'_id': sentinel.taskset_id}) + + @patch('celery.backends.mongodb.MongoBackend._get_database') + def test_forget(self, mock_get_database): + self.backend.taskmeta_collection = MONGODB_COLLECTION + + mock_database = MagicMock(spec=['__getitem__', '__setitem__']) + mock_collection = Mock() + + mock_get_database.return_value = mock_database + mock_database.__getitem__.return_value = mock_collection + + self.backend._forget(sentinel.task_id) + + mock_get_database.assert_called_once_with() + mock_database.__getitem__.assert_called_once_with( + MONGODB_COLLECTION) + mock_collection.remove.assert_called_once_with( + {'_id': sentinel.task_id}) + + @patch('celery.backends.mongodb.MongoBackend._get_database') + def test_cleanup(self, mock_get_database): + datetime.datetime = self._reset['datetime'] + self.backend.taskmeta_collection = MONGODB_COLLECTION + + mock_database = MagicMock(spec=['__getitem__', '__setitem__']) + self.backend.collections = mock_collection = Mock() + + mock_get_database.return_value = mock_database + mock_database.__getitem__.return_value = mock_collection + + self.backend.app.now = datetime.datetime.utcnow + self.backend.cleanup() + + mock_get_database.assert_called_once_with() + mock_database.__getitem__.assert_called_once_with( + MONGODB_COLLECTION) + self.assertTrue(mock_collection.remove.called) + + def test_get_database_authfailure(self): + x = MongoBackend(app=self.app) + x._get_connection = Mock() + conn = x._get_connection.return_value = {} + db = conn[x.database_name] = Mock() + db.authenticate.return_value = False + x.user = 'jerry' + x.password = 'cere4l' + with self.assertRaises(ImproperlyConfigured): + x._get_database() + db.authenticate.assert_called_with('jerry', 'cere4l') diff --git a/celery/tests/backends/test_redis.py b/celery/tests/backends/test_redis.py new file mode 100644 index 0000000..0ecc525 --- /dev/null +++ b/celery/tests/backends/test_redis.py @@ -0,0 +1,275 @@ +from __future__ import absolute_import + +from datetime import timedelta + +from pickle import loads, dumps + +from celery import signature +from celery import states +from celery import group +from celery import uuid +from celery.datastructures import AttributeDict +from celery.exceptions import ImproperlyConfigured +from celery.utils.timeutils import timedelta_seconds + +from celery.tests.case import ( + AppCase, Mock, MockCallbacks, SkipTest, depends_on_current_app, patch, +) + + +class Connection(object): + connected = True + + def disconnect(self): + self.connected = False + + +class Pipeline(object): + + def __init__(self, client): + self.client = client + self.steps = [] + + def __getattr__(self, attr): + + def add_step(*args, **kwargs): + self.steps.append((getattr(self.client, attr), args, kwargs)) + return self + return add_step + + def execute(self): + return [step(*a, **kw) for step, a, kw in self.steps] + + +class Redis(MockCallbacks): + Connection = Connection + Pipeline = Pipeline + + def __init__(self, host=None, port=None, db=None, password=None, **kw): + self.host = host + self.port = port + self.db = db + self.password = password + self.keyspace = {} + self.expiry = {} + self.connection = self.Connection() + + def get(self, key): + return self.keyspace.get(key) + + def setex(self, key, value, expires): + self.set(key, value) + self.expire(key, expires) + + def set(self, key, value): + self.keyspace[key] = value + + def expire(self, key, expires): + self.expiry[key] = expires + return expires + + def delete(self, key): + return bool(self.keyspace.pop(key, None)) + + def pipeline(self): + return self.Pipeline(self) + + def _get_list(self, key): + try: + return self.keyspace[key] + except KeyError: + l = self.keyspace[key] = [] + return l + + def rpush(self, key, value): + self._get_list(key).append(value) + + def lrange(self, key, start, stop): + return self._get_list(key)[start:stop] + + def llen(self, key): + return len(self.keyspace.get(key) or []) + + +class redis(object): + Redis = Redis + + class ConnectionPool(object): + + def __init__(self, **kwargs): + pass + + class UnixDomainSocketConnection(object): + + def __init__(self, **kwargs): + pass + + +class test_RedisBackend(AppCase): + + def get_backend(self): + from celery.backends.redis import RedisBackend + + class _RedisBackend(RedisBackend): + redis = redis + + return _RedisBackend + + def setup(self): + self.Backend = self.get_backend() + + @depends_on_current_app + def test_reduce(self): + try: + from celery.backends.redis import RedisBackend + x = RedisBackend(app=self.app, new_join=True) + self.assertTrue(loads(dumps(x))) + except ImportError: + raise SkipTest('redis not installed') + + def test_no_redis(self): + self.Backend.redis = None + with self.assertRaises(ImproperlyConfigured): + self.Backend(app=self.app, new_join=True) + + def test_url(self): + x = self.Backend( + 'redis://:bosco@vandelay.com:123//1', app=self.app, + new_join=True, + ) + self.assertTrue(x.connparams) + self.assertEqual(x.connparams['host'], 'vandelay.com') + self.assertEqual(x.connparams['db'], 1) + self.assertEqual(x.connparams['port'], 123) + self.assertEqual(x.connparams['password'], 'bosco') + + def test_socket_url(self): + x = self.Backend( + 'socket:///tmp/redis.sock?virtual_host=/3', app=self.app, + new_join=True, + ) + self.assertTrue(x.connparams) + self.assertEqual(x.connparams['path'], '/tmp/redis.sock') + self.assertIs( + x.connparams['connection_class'], + redis.UnixDomainSocketConnection, + ) + self.assertNotIn('host', x.connparams) + self.assertNotIn('port', x.connparams) + self.assertEqual(x.connparams['db'], 3) + + def test_compat_propertie(self): + x = self.Backend( + 'redis://:bosco@vandelay.com:123//1', app=self.app, + new_join=True, + ) + with self.assertPendingDeprecation(): + self.assertEqual(x.host, 'vandelay.com') + with self.assertPendingDeprecation(): + self.assertEqual(x.db, 1) + with self.assertPendingDeprecation(): + self.assertEqual(x.port, 123) + with self.assertPendingDeprecation(): + self.assertEqual(x.password, 'bosco') + + def test_conf_raises_KeyError(self): + self.app.conf = AttributeDict({ + 'CELERY_RESULT_SERIALIZER': 'json', + 'CELERY_MAX_CACHED_RESULTS': 1, + 'CELERY_ACCEPT_CONTENT': ['json'], + 'CELERY_TASK_RESULT_EXPIRES': None, + }) + self.Backend(app=self.app, new_join=True) + + def test_expires_defaults_to_config(self): + self.app.conf.CELERY_TASK_RESULT_EXPIRES = 10 + b = self.Backend(expires=None, app=self.app, new_join=True) + self.assertEqual(b.expires, 10) + + def test_expires_is_int(self): + b = self.Backend(expires=48, app=self.app, new_join=True) + self.assertEqual(b.expires, 48) + + def test_set_new_join_from_url_query(self): + b = self.Backend('redis://?new_join=True;foobar=1', app=self.app) + self.assertEqual(b.on_chord_part_return, b._new_chord_return) + self.assertEqual(b.apply_chord, b._new_chord_apply) + + def test_default_is_old_join(self): + b = self.Backend(app=self.app) + self.assertNotEqual(b.on_chord_part_return, b._new_chord_return) + self.assertNotEqual(b.apply_chord, b._new_chord_apply) + + def test_expires_is_None(self): + b = self.Backend(expires=None, app=self.app, new_join=True) + self.assertEqual(b.expires, timedelta_seconds( + self.app.conf.CELERY_TASK_RESULT_EXPIRES)) + + def test_expires_is_timedelta(self): + b = self.Backend( + expires=timedelta(minutes=1), app=self.app, new_join=1, + ) + self.assertEqual(b.expires, 60) + + def test_apply_chord(self): + self.Backend(app=self.app, new_join=True).apply_chord( + group(app=self.app), (), 'group_id', {}, + result=[self.app.AsyncResult(x) for x in [1, 2, 3]], + ) + + def test_mget(self): + b = self.Backend(app=self.app, new_join=True) + self.assertTrue(b.mget(['a', 'b', 'c'])) + b.client.mget.assert_called_with(['a', 'b', 'c']) + + def test_set_no_expire(self): + b = self.Backend(app=self.app, new_join=True) + b.expires = None + b.set('foo', 'bar') + + @patch('celery.result.GroupResult.restore') + def test_on_chord_part_return(self, restore): + b = self.Backend(app=self.app, new_join=True) + + def create_task(): + tid = uuid() + task = Mock(name='task-{0}'.format(tid)) + task.name = 'foobarbaz' + self.app.tasks['foobarbaz'] = task + task.request.chord = signature(task) + task.request.id = tid + task.request.chord['chord_size'] = 10 + task.request.group = 'group_id' + return task + + tasks = [create_task() for i in range(10)] + + for i in range(10): + b.on_chord_part_return(tasks[i], states.SUCCESS, i) + self.assertTrue(b.client.rpush.call_count) + b.client.rpush.reset_mock() + self.assertTrue(b.client.lrange.call_count) + gkey = b.get_key_for_group('group_id', '.j') + b.client.delete.assert_called_with(gkey) + b.client.expire.assert_called_witeh(gkey, 86400) + + def test_process_cleanup(self): + self.Backend(app=self.app, new_join=True).process_cleanup() + + def test_get_set_forget(self): + b = self.Backend(app=self.app, new_join=True) + tid = uuid() + b.store_result(tid, 42, states.SUCCESS) + self.assertEqual(b.get_status(tid), states.SUCCESS) + self.assertEqual(b.get_result(tid), 42) + b.forget(tid) + self.assertEqual(b.get_status(tid), states.PENDING) + + def test_set_expires(self): + b = self.Backend(expires=512, app=self.app, new_join=True) + tid = uuid() + key = b.get_key_for_task(tid) + b.store_result(tid, 42, states.SUCCESS) + b.client.expire.assert_called_with( + key, 512, + ) diff --git a/celery/tests/backends/test_rpc.py b/celery/tests/backends/test_rpc.py new file mode 100644 index 0000000..6fe594c --- /dev/null +++ b/celery/tests/backends/test_rpc.py @@ -0,0 +1,75 @@ +from __future__ import absolute_import + +from celery.backends.rpc import RPCBackend +from celery._state import _task_stack + +from celery.tests.case import AppCase, Mock, patch + + +class test_RPCBackend(AppCase): + + def setup(self): + self.b = RPCBackend(app=self.app) + + def test_oid(self): + oid = self.b.oid + oid2 = self.b.oid + self.assertEqual(oid, oid2) + self.assertEqual(oid, self.app.oid) + + def test_interface(self): + self.b.on_reply_declare('task_id') + + def test_destination_for(self): + req = Mock(name='request') + req.reply_to = 'reply_to' + req.correlation_id = 'corid' + self.assertTupleEqual( + self.b.destination_for('task_id', req), + ('reply_to', 'corid'), + ) + task = Mock() + _task_stack.push(task) + try: + task.request.reply_to = 'reply_to' + task.request.correlation_id = 'corid' + self.assertTupleEqual( + self.b.destination_for('task_id', None), + ('reply_to', 'corid'), + ) + finally: + _task_stack.pop() + + with self.assertRaises(RuntimeError): + self.b.destination_for('task_id', None) + + def test_binding(self): + queue = self.b.binding + self.assertEqual(queue.name, self.b.oid) + self.assertEqual(queue.exchange, self.b.exchange) + self.assertEqual(queue.routing_key, self.b.oid) + self.assertFalse(queue.durable) + self.assertFalse(queue.auto_delete) + + def test_many_bindings(self): + self.assertListEqual( + self.b._many_bindings(['a', 'b']), + [self.b.binding], + ) + + def test_create_binding(self): + self.assertEqual(self.b._create_binding('id'), self.b.binding) + + def test_on_task_call(self): + with patch('celery.backends.rpc.maybe_declare') as md: + with self.app.amqp.producer_pool.acquire() as prod: + self.b.on_task_call(prod, 'task_id'), + md.assert_called_with( + self.b.binding(prod.channel), + retry=True, + ) + + def test_create_exchange(self): + ex = self.b._create_exchange('name') + self.assertIsInstance(ex, self.b.Exchange) + self.assertEqual(ex.name, '') diff --git a/celery/tests/bin/__init__.py b/celery/tests/bin/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/celery/tests/bin/proj/__init__.py b/celery/tests/bin/proj/__init__.py new file mode 100644 index 0000000..ffe8fb0 --- /dev/null +++ b/celery/tests/bin/proj/__init__.py @@ -0,0 +1,5 @@ +from __future__ import absolute_import + +from celery import Celery + +hello = Celery(set_as_current=False) diff --git a/celery/tests/bin/proj/app.py b/celery/tests/bin/proj/app.py new file mode 100644 index 0000000..f1fb15e --- /dev/null +++ b/celery/tests/bin/proj/app.py @@ -0,0 +1,5 @@ +from __future__ import absolute_import + +from celery import Celery + +app = Celery(set_as_current=False) diff --git a/celery/tests/bin/test_amqp.py b/celery/tests/bin/test_amqp.py new file mode 100644 index 0000000..8840a9f --- /dev/null +++ b/celery/tests/bin/test_amqp.py @@ -0,0 +1,153 @@ +from __future__ import absolute_import + +from celery.bin.amqp import ( + AMQPAdmin, + AMQShell, + dump_message, + amqp, + main, +) + +from celery.tests.case import AppCase, Mock, WhateverIO, patch + + +class test_AMQShell(AppCase): + + def setup(self): + self.fh = WhateverIO() + self.adm = self.create_adm() + self.shell = AMQShell(connect=self.adm.connect, out=self.fh) + + def create_adm(self, *args, **kwargs): + return AMQPAdmin(app=self.app, out=self.fh, *args, **kwargs) + + def test_queue_declare(self): + self.shell.onecmd('queue.declare foo') + self.assertIn('ok', self.fh.getvalue()) + + def test_missing_command(self): + self.shell.onecmd('foo foo') + self.assertIn('unknown syntax', self.fh.getvalue()) + + def RV(self): + raise Exception(self.fh.getvalue()) + + def test_spec_format_response(self): + spec = self.shell.amqp['exchange.declare'] + self.assertEqual(spec.format_response(None), 'ok.') + self.assertEqual(spec.format_response('NO'), 'NO') + + def test_missing_namespace(self): + self.shell.onecmd('ns.cmd arg') + self.assertIn('unknown syntax', self.fh.getvalue()) + + def test_help(self): + self.shell.onecmd('help') + self.assertIn('Example:', self.fh.getvalue()) + + def test_help_command(self): + self.shell.onecmd('help queue.declare') + self.assertIn('passive:no', self.fh.getvalue()) + + def test_help_unknown_command(self): + self.shell.onecmd('help foo.baz') + self.assertIn('unknown syntax', self.fh.getvalue()) + + def test_onecmd_error(self): + self.shell.dispatch = Mock() + self.shell.dispatch.side_effect = MemoryError() + self.shell.say = Mock() + self.assertFalse(self.shell.needs_reconnect) + self.shell.onecmd('hello') + self.assertTrue(self.shell.say.called) + self.assertTrue(self.shell.needs_reconnect) + + def test_exit(self): + with self.assertRaises(SystemExit): + self.shell.onecmd('exit') + self.assertIn("don't leave!", self.fh.getvalue()) + + def test_note_silent(self): + self.shell.silent = True + self.shell.note('foo bar') + self.assertNotIn('foo bar', self.fh.getvalue()) + + def test_reconnect(self): + self.shell.onecmd('queue.declare foo') + self.shell.needs_reconnect = True + self.shell.onecmd('queue.delete foo') + + def test_completenames(self): + self.assertEqual( + self.shell.completenames('queue.dec'), + ['queue.declare'], + ) + self.assertEqual( + sorted(self.shell.completenames('declare')), + sorted(['queue.declare', 'exchange.declare']), + ) + + def test_empty_line(self): + self.shell.emptyline = Mock() + self.shell.default = Mock() + self.shell.onecmd('') + self.shell.emptyline.assert_called_with() + self.shell.onecmd('foo') + self.shell.default.assert_called_with('foo') + + def test_respond(self): + self.shell.respond({'foo': 'bar'}) + self.assertIn('foo', self.fh.getvalue()) + + def test_prompt(self): + self.assertTrue(self.shell.prompt) + + def test_no_returns(self): + self.shell.onecmd('queue.declare foo') + self.shell.onecmd('exchange.declare bar direct yes') + self.shell.onecmd('queue.bind foo bar baz') + self.shell.onecmd('basic.ack 1') + + def test_dump_message(self): + m = Mock() + m.body = 'the quick brown fox' + m.properties = {'a': 1} + m.delivery_info = {'exchange': 'bar'} + self.assertTrue(dump_message(m)) + + def test_dump_message_no_message(self): + self.assertIn('No messages in queue', dump_message(None)) + + def test_note(self): + self.adm.silent = True + self.adm.note('FOO') + self.assertNotIn('FOO', self.fh.getvalue()) + + def test_run(self): + a = self.create_adm('queue.declare foo') + a.run() + self.assertIn('ok', self.fh.getvalue()) + + def test_run_loop(self): + a = self.create_adm() + a.Shell = Mock() + shell = a.Shell.return_value = Mock() + shell.cmdloop = Mock() + a.run() + shell.cmdloop.assert_called_with() + + shell.cmdloop.side_effect = KeyboardInterrupt() + a.run() + self.assertIn('bibi', self.fh.getvalue()) + + @patch('celery.bin.amqp.amqp') + def test_main(self, Command): + c = Command.return_value = Mock() + main() + c.execute_from_commandline.assert_called_with() + + @patch('celery.bin.amqp.AMQPAdmin') + def test_command(self, cls): + x = amqp(app=self.app) + x.run() + self.assertIs(cls.call_args[1]['app'], self.app) diff --git a/celery/tests/bin/test_base.py b/celery/tests/bin/test_base.py new file mode 100644 index 0000000..8d1d0d5 --- /dev/null +++ b/celery/tests/bin/test_base.py @@ -0,0 +1,316 @@ +from __future__ import absolute_import + +import os + +from celery.bin.base import ( + Command, + Option, + Extensions, + HelpFormatter, +) +from celery.tests.case import ( + AppCase, Mock, depends_on_current_app, override_stdouts, patch, +) + + +class Object(object): + pass + + +class MyApp(object): + user_options = {'preload': None} + +APP = MyApp() # <-- Used by test_with_custom_app + + +class MockCommand(Command): + mock_args = ('arg1', 'arg2', 'arg3') + + def parse_options(self, prog_name, arguments, command=None): + options = Object() + options.foo = 'bar' + options.prog_name = prog_name + return options, self.mock_args + + def run(self, *args, **kwargs): + return args, kwargs + + +class test_Extensions(AppCase): + + def test_load(self): + with patch('pkg_resources.iter_entry_points') as iterep: + with patch('celery.bin.base.symbol_by_name') as symbyname: + ep = Mock() + ep.name = 'ep' + ep.module_name = 'foo' + ep.attrs = ['bar', 'baz'] + iterep.return_value = [ep] + cls = symbyname.return_value = Mock() + register = Mock() + e = Extensions('unit', register) + e.load() + symbyname.assert_called_with('foo:bar') + register.assert_called_with(cls, name='ep') + + with patch('celery.bin.base.symbol_by_name') as symbyname: + symbyname.side_effect = SyntaxError() + with patch('warnings.warn') as warn: + e.load() + self.assertTrue(warn.called) + + with patch('celery.bin.base.symbol_by_name') as symbyname: + symbyname.side_effect = KeyError('foo') + with self.assertRaises(KeyError): + e.load() + + +class test_HelpFormatter(AppCase): + + def test_format_epilog(self): + f = HelpFormatter() + self.assertTrue(f.format_epilog('hello')) + self.assertFalse(f.format_epilog('')) + + def test_format_description(self): + f = HelpFormatter() + self.assertTrue(f.format_description('hello')) + + +class test_Command(AppCase): + + def test_get_options(self): + cmd = Command() + cmd.option_list = (1, 2, 3) + self.assertTupleEqual(cmd.get_options(), (1, 2, 3)) + + def test_custom_description(self): + + class C(Command): + description = 'foo' + + c = C() + self.assertEqual(c.description, 'foo') + + def test_register_callbacks(self): + c = Command(on_error=8, on_usage_error=9) + self.assertEqual(c.on_error, 8) + self.assertEqual(c.on_usage_error, 9) + + def test_run_raises_UsageError(self): + cb = Mock() + c = Command(on_usage_error=cb) + c.verify_args = Mock() + c.run = Mock() + exc = c.run.side_effect = c.UsageError('foo', status=3) + + self.assertEqual(c(), exc.status) + cb.assert_called_with(exc) + c.verify_args.assert_called_with(()) + + def test_default_on_usage_error(self): + cmd = Command() + cmd.handle_error = Mock() + exc = Exception() + cmd.on_usage_error(exc) + cmd.handle_error.assert_called_with(exc) + + def test_verify_args_missing(self): + c = Command() + + def run(a, b, c): + pass + c.run = run + + with self.assertRaises(c.UsageError): + c.verify_args((1, )) + c.verify_args((1, 2, 3)) + + def test_run_interface(self): + with self.assertRaises(NotImplementedError): + Command().run() + + @patch('sys.stdout') + def test_early_version(self, stdout): + cmd = Command() + with self.assertRaises(SystemExit): + cmd.early_version(['--version']) + + def test_execute_from_commandline(self): + cmd = MockCommand(app=self.app) + args1, kwargs1 = cmd.execute_from_commandline() # sys.argv + self.assertTupleEqual(args1, cmd.mock_args) + self.assertDictContainsSubset({'foo': 'bar'}, kwargs1) + self.assertTrue(kwargs1.get('prog_name')) + args2, kwargs2 = cmd.execute_from_commandline(['foo']) # pass list + self.assertTupleEqual(args2, cmd.mock_args) + self.assertDictContainsSubset({'foo': 'bar', 'prog_name': 'foo'}, + kwargs2) + + def test_with_bogus_args(self): + with override_stdouts() as (_, stderr): + cmd = MockCommand(app=self.app) + cmd.supports_args = False + with self.assertRaises(SystemExit): + cmd.execute_from_commandline(argv=['--bogus']) + self.assertTrue(stderr.getvalue()) + self.assertIn('Unrecognized', stderr.getvalue()) + + def test_with_custom_config_module(self): + prev = os.environ.pop('CELERY_CONFIG_MODULE', None) + try: + cmd = MockCommand(app=self.app) + cmd.setup_app_from_commandline(['--config=foo.bar.baz']) + self.assertEqual(os.environ.get('CELERY_CONFIG_MODULE'), + 'foo.bar.baz') + finally: + if prev: + os.environ['CELERY_CONFIG_MODULE'] = prev + else: + os.environ.pop('CELERY_CONFIG_MODULE', None) + + def test_with_custom_broker(self): + prev = os.environ.pop('CELERY_BROKER_URL', None) + try: + cmd = MockCommand(app=self.app) + cmd.setup_app_from_commandline(['--broker=xyzza://']) + self.assertEqual( + os.environ.get('CELERY_BROKER_URL'), 'xyzza://', + ) + finally: + if prev: + os.environ['CELERY_BROKER_URL'] = prev + else: + os.environ.pop('CELERY_BROKER_URL', None) + + def test_with_custom_app(self): + cmd = MockCommand(app=self.app) + app = '.'.join([__name__, 'APP']) + cmd.setup_app_from_commandline(['--app=%s' % (app, ), + '--loglevel=INFO']) + self.assertIs(cmd.app, APP) + cmd.setup_app_from_commandline(['-A', app, + '--loglevel=INFO']) + self.assertIs(cmd.app, APP) + + def test_setup_app_sets_quiet(self): + cmd = MockCommand(app=self.app) + cmd.setup_app_from_commandline(['-q']) + self.assertTrue(cmd.quiet) + cmd2 = MockCommand(app=self.app) + cmd2.setup_app_from_commandline(['--quiet']) + self.assertTrue(cmd2.quiet) + + def test_setup_app_sets_chdir(self): + with patch('os.chdir') as chdir: + cmd = MockCommand(app=self.app) + cmd.setup_app_from_commandline(['--workdir=/opt']) + chdir.assert_called_with('/opt') + + def test_setup_app_sets_loader(self): + prev = os.environ.get('CELERY_LOADER') + try: + cmd = MockCommand(app=self.app) + cmd.setup_app_from_commandline(['--loader=X.Y:Z']) + self.assertEqual(os.environ['CELERY_LOADER'], 'X.Y:Z') + finally: + if prev is not None: + os.environ['CELERY_LOADER'] = prev + + def test_setup_app_no_respect(self): + cmd = MockCommand(app=self.app) + cmd.respects_app_option = False + with patch('celery.bin.base.Celery') as cp: + cmd.setup_app_from_commandline(['--app=x.y:z']) + self.assertTrue(cp.called) + + def test_setup_app_custom_app(self): + cmd = MockCommand(app=self.app) + app = cmd.app = Mock() + app.user_options = {'preload': None} + cmd.setup_app_from_commandline([]) + self.assertEqual(cmd.app, app) + + def test_find_app_suspects(self): + cmd = MockCommand(app=self.app) + self.assertTrue(cmd.find_app('celery.tests.bin.proj.app')) + self.assertTrue(cmd.find_app('celery.tests.bin.proj')) + self.assertTrue(cmd.find_app('celery.tests.bin.proj:hello')) + self.assertTrue(cmd.find_app('celery.tests.bin.proj.app:app')) + + with self.assertRaises(AttributeError): + cmd.find_app(__name__) + + def test_host_format(self): + cmd = MockCommand(app=self.app) + with patch('socket.gethostname') as hn: + hn.return_value = 'blacktron.example.com' + self.assertEqual(cmd.host_format(''), '') + self.assertEqual( + cmd.host_format('celery@%h'), + 'celery@blacktron.example.com', + ) + self.assertEqual( + cmd.host_format('celery@%d'), + 'celery@example.com', + ) + self.assertEqual( + cmd.host_format('celery@%n'), + 'celery@blacktron', + ) + + def test_say_chat_quiet(self): + cmd = MockCommand(app=self.app) + cmd.quiet = True + self.assertIsNone(cmd.say_chat('<-', 'foo', 'foo')) + + def test_say_chat_show_body(self): + cmd = MockCommand(app=self.app) + cmd.out = Mock() + cmd.show_body = True + cmd.say_chat('->', 'foo', 'body') + cmd.out.assert_called_with('body') + + def test_say_chat_no_body(self): + cmd = MockCommand(app=self.app) + cmd.out = Mock() + cmd.show_body = False + cmd.say_chat('->', 'foo', 'body') + + @depends_on_current_app + def test_with_cmdline_config(self): + cmd = MockCommand(app=self.app) + cmd.enable_config_from_cmdline = True + cmd.namespace = 'celeryd' + rest = cmd.setup_app_from_commandline(argv=[ + '--loglevel=INFO', '--', + 'broker.url=amqp://broker.example.com', + '.prefetch_multiplier=100']) + self.assertEqual(cmd.app.conf.BROKER_URL, + 'amqp://broker.example.com') + self.assertEqual(cmd.app.conf.CELERYD_PREFETCH_MULTIPLIER, 100) + self.assertListEqual(rest, ['--loglevel=INFO']) + + def test_find_app(self): + cmd = MockCommand(app=self.app) + with patch('celery.bin.base.symbol_by_name') as sbn: + from types import ModuleType + x = ModuleType('proj') + + def on_sbn(*args, **kwargs): + + def after(*args, **kwargs): + x.app = 'quick brown fox' + x.__path__ = None + return x + sbn.side_effect = after + return x + sbn.side_effect = on_sbn + x.__path__ = [True] + self.assertEqual(cmd.find_app('proj'), 'quick brown fox') + + def test_parse_preload_options_shortopt(self): + cmd = Command() + cmd.preload_options = (Option('-s', action='store', dest='silent'), ) + acc = cmd.parse_preload_options(['-s', 'yes']) + self.assertEqual(acc.get('silent'), 'yes') diff --git a/celery/tests/bin/test_beat.py b/celery/tests/bin/test_beat.py new file mode 100644 index 0000000..45a7438 --- /dev/null +++ b/celery/tests/bin/test_beat.py @@ -0,0 +1,196 @@ +from __future__ import absolute_import + +import logging +import sys + +from collections import defaultdict + +from celery import beat +from celery import platforms +from celery.bin import beat as beat_bin +from celery.apps import beat as beatapp + +from celery.tests.case import AppCase, Mock, patch, restore_logging +from kombu.tests.case import redirect_stdouts + + +class MockedShelveModule(object): + shelves = defaultdict(lambda: {}) + + def open(self, filename, *args, **kwargs): + return self.shelves[filename] +mocked_shelve = MockedShelveModule() + + +class MockService(beat.Service): + started = False + in_sync = False + persistence = mocked_shelve + + def start(self): + self.__class__.started = True + + def sync(self): + self.__class__.in_sync = True + + +class MockBeat(beatapp.Beat): + running = False + + def run(self): + MockBeat.running = True + + +class MockBeat2(beatapp.Beat): + Service = MockService + + def install_sync_handler(self, b): + pass + + +class MockBeat3(beatapp.Beat): + Service = MockService + + def install_sync_handler(self, b): + raise TypeError('xxx') + + +class test_Beat(AppCase): + + def test_loglevel_string(self): + b = beatapp.Beat(app=self.app, loglevel='DEBUG', + redirect_stdouts=False) + self.assertEqual(b.loglevel, logging.DEBUG) + + b2 = beatapp.Beat(app=self.app, loglevel=logging.DEBUG, + redirect_stdouts=False) + self.assertEqual(b2.loglevel, logging.DEBUG) + + def test_colorize(self): + self.app.log.setup = Mock() + b = beatapp.Beat(app=self.app, no_color=True, + redirect_stdouts=False) + b.setup_logging() + self.assertTrue(self.app.log.setup.called) + self.assertEqual(self.app.log.setup.call_args[1]['colorize'], False) + + def test_init_loader(self): + b = beatapp.Beat(app=self.app, redirect_stdouts=False) + b.init_loader() + + def test_process_title(self): + b = beatapp.Beat(app=self.app, redirect_stdouts=False) + b.set_process_title() + + def test_run(self): + b = MockBeat2(app=self.app, redirect_stdouts=False) + MockService.started = False + b.run() + self.assertTrue(MockService.started) + + def psig(self, fun, *args, **kwargs): + handlers = {} + + class Signals(platforms.Signals): + + def __setitem__(self, sig, handler): + handlers[sig] = handler + + p, platforms.signals = platforms.signals, Signals() + try: + fun(*args, **kwargs) + return handlers + finally: + platforms.signals = p + + def test_install_sync_handler(self): + b = beatapp.Beat(app=self.app, redirect_stdouts=False) + clock = MockService(app=self.app) + MockService.in_sync = False + handlers = self.psig(b.install_sync_handler, clock) + with self.assertRaises(SystemExit): + handlers['SIGINT']('SIGINT', object()) + self.assertTrue(MockService.in_sync) + MockService.in_sync = False + + def test_setup_logging(self): + with restore_logging(): + try: + # py3k + delattr(sys.stdout, 'logger') + except AttributeError: + pass + b = beatapp.Beat(app=self.app, redirect_stdouts=False) + b.redirect_stdouts = False + b.app.log.already_setup = False + b.setup_logging() + with self.assertRaises(AttributeError): + sys.stdout.logger + + @redirect_stdouts + @patch('celery.apps.beat.logger') + def test_logs_errors(self, logger, stdout, stderr): + with restore_logging(): + b = MockBeat3( + app=self.app, redirect_stdouts=False, socket_timeout=None, + ) + b.start_scheduler() + self.assertTrue(logger.critical.called) + + @redirect_stdouts + @patch('celery.platforms.create_pidlock') + def test_use_pidfile(self, create_pidlock, stdout, stderr): + b = MockBeat2(app=self.app, pidfile='pidfilelockfilepid', + socket_timeout=None, redirect_stdouts=False) + b.start_scheduler() + self.assertTrue(create_pidlock.called) + + +class MockDaemonContext(object): + opened = False + closed = False + + def __init__(self, *args, **kwargs): + pass + + def open(self): + self.__class__.opened = True + return self + __enter__ = open + + def close(self, *args): + self.__class__.closed = True + __exit__ = close + + +class test_div(AppCase): + + def setup(self): + self.prev, beatapp.Beat = beatapp.Beat, MockBeat + self.ctx, beat_bin.detached = ( + beat_bin.detached, MockDaemonContext, + ) + + def teardown(self): + beatapp.Beat = self.prev + + def test_main(self): + sys.argv = [sys.argv[0], '-s', 'foo'] + try: + beat_bin.main(app=self.app) + self.assertTrue(MockBeat.running) + finally: + MockBeat.running = False + + def test_detach(self): + cmd = beat_bin.beat() + cmd.app = self.app + cmd.run(detach=True) + self.assertTrue(MockDaemonContext.opened) + self.assertTrue(MockDaemonContext.closed) + + def test_parse_options(self): + cmd = beat_bin.beat() + cmd.app = self.app + options, args = cmd.parse_options('celery beat', ['-s', 'foo']) + self.assertEqual(options.schedule, 'foo') diff --git a/celery/tests/bin/test_celery.py b/celery/tests/bin/test_celery.py new file mode 100644 index 0000000..fbfdb62 --- /dev/null +++ b/celery/tests/bin/test_celery.py @@ -0,0 +1,588 @@ +from __future__ import absolute_import + +import sys + +from anyjson import dumps +from datetime import datetime + +from celery import __main__ +from celery.platforms import EX_FAILURE, EX_USAGE, EX_OK +from celery.bin.base import Error +from celery.bin.celery import ( + Command, + list_, + call, + purge, + result, + inspect, + control, + status, + migrate, + help, + report, + CeleryCommand, + determine_exit_status, + multi, + main as mainfun, + _RemoteControl, + command, +) + +from celery.tests.case import ( + AppCase, Mock, WhateverIO, override_stdouts, patch, +) + + +class test__main__(AppCase): + + def test_warn_deprecated(self): + with override_stdouts() as (stdout, _): + __main__._warn_deprecated('YADDA YADDA') + self.assertIn('command is deprecated', stdout.getvalue()) + self.assertIn('YADDA YADDA', stdout.getvalue()) + + def test_main(self): + with patch('celery.__main__.maybe_patch_concurrency') as mpc: + with patch('celery.bin.celery.main') as main: + __main__.main() + mpc.assert_called_with() + main.assert_called_with() + + def test_compat_worker(self): + with patch('celery.__main__.maybe_patch_concurrency') as mpc: + with patch('celery.__main__._warn_deprecated') as depr: + with patch('celery.bin.worker.main') as main: + __main__._compat_worker() + mpc.assert_called_with() + depr.assert_called_with('celery worker') + main.assert_called_with() + + def test_compat_multi(self): + with patch('celery.__main__.maybe_patch_concurrency') as mpc: + with patch('celery.__main__._warn_deprecated') as depr: + with patch('celery.bin.multi.main') as main: + __main__._compat_multi() + self.assertFalse(mpc.called) + depr.assert_called_with('celery multi') + main.assert_called_with() + + def test_compat_beat(self): + with patch('celery.__main__.maybe_patch_concurrency') as mpc: + with patch('celery.__main__._warn_deprecated') as depr: + with patch('celery.bin.beat.main') as main: + __main__._compat_beat() + mpc.assert_called_with() + depr.assert_called_with('celery beat') + main.assert_called_with() + + +class test_Command(AppCase): + + def test_Error_repr(self): + x = Error('something happened') + self.assertIsNotNone(x.status) + self.assertTrue(x.reason) + self.assertTrue(str(x)) + + def setup(self): + self.out = WhateverIO() + self.err = WhateverIO() + self.cmd = Command(self.app, stdout=self.out, stderr=self.err) + + def test_error(self): + self.cmd.out = Mock() + self.cmd.error('FOO') + self.assertTrue(self.cmd.out.called) + + def test_out(self): + f = Mock() + self.cmd.out('foo', f) + + def test_call(self): + + def ok_run(): + pass + + self.cmd.run = ok_run + self.assertEqual(self.cmd(), EX_OK) + + def error_run(): + raise Error('error', EX_FAILURE) + self.cmd.run = error_run + self.assertEqual(self.cmd(), EX_FAILURE) + + def test_run_from_argv(self): + with self.assertRaises(NotImplementedError): + self.cmd.run_from_argv('prog', ['foo', 'bar']) + + def test_pretty_list(self): + self.assertEqual(self.cmd.pretty([])[1], '- empty -') + self.assertIn('bar', self.cmd.pretty(['foo', 'bar'])[1]) + + def test_pretty_dict(self): + self.assertIn( + 'OK', + str(self.cmd.pretty({'ok': 'the quick brown fox'})[0]), + ) + self.assertIn( + 'ERROR', + str(self.cmd.pretty({'error': 'the quick brown fox'})[0]), + ) + + def test_pretty(self): + self.assertIn('OK', str(self.cmd.pretty('the quick brown'))) + self.assertIn('OK', str(self.cmd.pretty(object()))) + self.assertIn('OK', str(self.cmd.pretty({'foo': 'bar'}))) + + +class test_list(AppCase): + + def test_list_bindings_no_support(self): + l = list_(app=self.app, stderr=WhateverIO()) + management = Mock() + management.get_bindings.side_effect = NotImplementedError() + with self.assertRaises(Error): + l.list_bindings(management) + + def test_run(self): + l = list_(app=self.app, stderr=WhateverIO()) + l.run('bindings') + + with self.assertRaises(Error): + l.run(None) + + with self.assertRaises(Error): + l.run('foo') + + +class test_call(AppCase): + + def setup(self): + + @self.app.task(shared=False) + def add(x, y): + return x + y + self.add = add + + @patch('celery.app.base.Celery.send_task') + def test_run(self, send_task): + a = call(app=self.app, stderr=WhateverIO(), stdout=WhateverIO()) + a.run(self.add.name) + self.assertTrue(send_task.called) + + a.run(self.add.name, + args=dumps([4, 4]), + kwargs=dumps({'x': 2, 'y': 2})) + self.assertEqual(send_task.call_args[1]['args'], [4, 4]) + self.assertEqual(send_task.call_args[1]['kwargs'], {'x': 2, 'y': 2}) + + a.run(self.add.name, expires=10, countdown=10) + self.assertEqual(send_task.call_args[1]['expires'], 10) + self.assertEqual(send_task.call_args[1]['countdown'], 10) + + now = datetime.now() + iso = now.isoformat() + a.run(self.add.name, expires=iso) + self.assertEqual(send_task.call_args[1]['expires'], now) + with self.assertRaises(ValueError): + a.run(self.add.name, expires='foobaribazibar') + + +class test_purge(AppCase): + + @patch('celery.app.control.Control.purge') + def test_run(self, purge_): + out = WhateverIO() + a = purge(app=self.app, stdout=out) + purge_.return_value = 0 + a.run(force=True) + self.assertIn('No messages purged', out.getvalue()) + + purge_.return_value = 100 + a.run(force=True) + self.assertIn('100 messages', out.getvalue()) + + +class test_result(AppCase): + + def setup(self): + + @self.app.task(shared=False) + def add(x, y): + return x + y + self.add = add + + def test_run(self): + with patch('celery.result.AsyncResult.get') as get: + out = WhateverIO() + r = result(app=self.app, stdout=out) + get.return_value = 'Jerry' + r.run('id') + self.assertIn('Jerry', out.getvalue()) + + get.return_value = 'Elaine' + r.run('id', task=self.add.name) + self.assertIn('Elaine', out.getvalue()) + + with patch('celery.result.AsyncResult.traceback') as tb: + r.run('id', task=self.add.name, traceback=True) + self.assertIn(str(tb), out.getvalue()) + + +class test_status(AppCase): + + @patch('celery.bin.celery.inspect') + def test_run(self, inspect_): + out, err = WhateverIO(), WhateverIO() + ins = inspect_.return_value = Mock() + ins.run.return_value = [] + s = status(self.app, stdout=out, stderr=err) + with self.assertRaises(Error): + s.run() + + ins.run.return_value = ['a', 'b', 'c'] + s.run() + self.assertIn('3 nodes online', out.getvalue()) + s.run(quiet=True) + + +class test_migrate(AppCase): + + @patch('celery.contrib.migrate.migrate_tasks') + def test_run(self, migrate_tasks): + out = WhateverIO() + m = migrate(app=self.app, stdout=out, stderr=WhateverIO()) + with self.assertRaises(TypeError): + m.run() + self.assertFalse(migrate_tasks.called) + + m.run('memory://foo', 'memory://bar') + self.assertTrue(migrate_tasks.called) + + state = Mock() + state.count = 10 + state.strtotal = 30 + m.on_migrate_task(state, {'task': 'tasks.add', 'id': 'ID'}, None) + self.assertIn('10/30', out.getvalue()) + + +class test_report(AppCase): + + def test_run(self): + out = WhateverIO() + r = report(app=self.app, stdout=out) + self.assertEqual(r.run(), EX_OK) + self.assertTrue(out.getvalue()) + + +class test_help(AppCase): + + def test_run(self): + out = WhateverIO() + h = help(app=self.app, stdout=out) + h.parser = Mock() + self.assertEqual(h.run(), EX_USAGE) + self.assertTrue(out.getvalue()) + self.assertTrue(h.usage('help')) + h.parser.print_help.assert_called_with() + + +class test_CeleryCommand(AppCase): + + def test_execute_from_commandline(self): + x = CeleryCommand(app=self.app) + x.handle_argv = Mock() + x.handle_argv.return_value = 1 + with self.assertRaises(SystemExit): + x.execute_from_commandline() + + x.handle_argv.return_value = True + with self.assertRaises(SystemExit): + x.execute_from_commandline() + + x.handle_argv.side_effect = KeyboardInterrupt() + with self.assertRaises(SystemExit): + x.execute_from_commandline() + + x.respects_app_option = True + with self.assertRaises(SystemExit): + x.execute_from_commandline(['celery', 'multi']) + self.assertFalse(x.respects_app_option) + x.respects_app_option = True + with self.assertRaises(SystemExit): + x.execute_from_commandline(['manage.py', 'celery', 'multi']) + self.assertFalse(x.respects_app_option) + + def test_with_pool_option(self): + x = CeleryCommand(app=self.app) + self.assertIsNone(x.with_pool_option(['celery', 'events'])) + self.assertTrue(x.with_pool_option(['celery', 'worker'])) + self.assertTrue(x.with_pool_option(['manage.py', 'celery', 'worker'])) + + def test_load_extensions_no_commands(self): + with patch('celery.bin.celery.Extensions') as Ext: + ext = Ext.return_value = Mock(name='Extension') + ext.load.return_value = None + x = CeleryCommand(app=self.app) + x.load_extension_commands() + + def test_determine_exit_status(self): + self.assertEqual(determine_exit_status('true'), EX_OK) + self.assertEqual(determine_exit_status(''), EX_FAILURE) + + def test_relocate_args_from_start(self): + x = CeleryCommand(app=self.app) + self.assertEqual(x._relocate_args_from_start(None), []) + self.assertEqual( + x._relocate_args_from_start( + ['-l', 'debug', 'worker', '-c', '3', '--foo'], + ), + ['worker', '-c', '3', '--foo', '-l', 'debug'], + ) + self.assertEqual( + x._relocate_args_from_start( + ['--pool=gevent', '-l', 'debug', 'worker', '--foo', '-c', '3'], + ), + ['worker', '--foo', '-c', '3', '--pool=gevent', '-l', 'debug'], + ) + self.assertEqual( + x._relocate_args_from_start(['foo', '--foo=1']), + ['foo', '--foo=1'], + ) + + def test_handle_argv(self): + x = CeleryCommand(app=self.app) + x.execute = Mock() + x.handle_argv('celery', []) + x.execute.assert_called_with('help', ['help']) + + x.handle_argv('celery', ['start', 'foo']) + x.execute.assert_called_with('start', ['start', 'foo']) + + def test_execute(self): + x = CeleryCommand(app=self.app) + Help = x.commands['help'] = Mock() + help = Help.return_value = Mock() + x.execute('fooox', ['a']) + help.run_from_argv.assert_called_with(x.prog_name, [], command='help') + help.reset() + x.execute('help', ['help']) + help.run_from_argv.assert_called_with(x.prog_name, [], command='help') + + Dummy = x.commands['dummy'] = Mock() + dummy = Dummy.return_value = Mock() + exc = dummy.run_from_argv.side_effect = Error( + 'foo', status='EX_FAILURE', + ) + x.on_error = Mock(name='on_error') + help.reset() + x.execute('dummy', ['dummy']) + x.on_error.assert_called_with(exc) + dummy.run_from_argv.assert_called_with( + x.prog_name, [], command='dummy', + ) + help.run_from_argv.assert_called_with( + x.prog_name, [], command='help', + ) + + exc = dummy.run_from_argv.side_effect = x.UsageError('foo') + x.on_usage_error = Mock() + x.execute('dummy', ['dummy']) + x.on_usage_error.assert_called_with(exc) + + def test_on_usage_error(self): + x = CeleryCommand(app=self.app) + x.error = Mock() + x.on_usage_error(x.UsageError('foo'), command=None) + self.assertTrue(x.error.called) + x.on_usage_error(x.UsageError('foo'), command='dummy') + + def test_prepare_prog_name(self): + x = CeleryCommand(app=self.app) + main = Mock(name='__main__') + main.__file__ = '/opt/foo.py' + with patch.dict(sys.modules, __main__=main): + self.assertEqual(x.prepare_prog_name('__main__.py'), '/opt/foo.py') + self.assertEqual(x.prepare_prog_name('celery'), 'celery') + + +class test_RemoteControl(AppCase): + + def test_call_interface(self): + with self.assertRaises(NotImplementedError): + _RemoteControl(app=self.app).call() + + +class test_inspect(AppCase): + + def test_usage(self): + self.assertTrue(inspect(app=self.app).usage('foo')) + + def test_command_info(self): + i = inspect(app=self.app) + self.assertTrue(i.get_command_info( + 'ping', help=True, color=i.colored.red, + )) + + def test_list_commands_color(self): + i = inspect(app=self.app) + self.assertTrue(i.list_commands( + help=True, color=i.colored.red, + )) + self.assertTrue(i.list_commands( + help=False, color=None, + )) + + def test_epilog(self): + self.assertTrue(inspect(app=self.app).epilog) + + def test_do_call_method_sql_transport_type(self): + self.app.connection = Mock() + conn = self.app.connection.return_value = Mock(name='Connection') + conn.transport.driver_type = 'sql' + i = inspect(app=self.app) + with self.assertRaises(i.Error): + i.do_call_method(['ping']) + + def test_say_directions(self): + i = inspect(self.app) + i.out = Mock() + i.quiet = True + i.say_chat('<-', 'hello out') + self.assertFalse(i.out.called) + + i.say_chat('->', 'hello in') + self.assertTrue(i.out.called) + + i.quiet = False + i.out.reset_mock() + i.say_chat('<-', 'hello out', 'body') + self.assertTrue(i.out.called) + + @patch('celery.app.control.Control.inspect') + def test_run(self, real): + out = WhateverIO() + i = inspect(app=self.app, stdout=out) + with self.assertRaises(Error): + i.run() + with self.assertRaises(Error): + i.run('help') + with self.assertRaises(Error): + i.run('xyzzybaz') + + i.run('ping') + self.assertTrue(real.called) + i.run('ping', destination='foo,bar') + self.assertEqual(real.call_args[1]['destination'], ['foo', 'bar']) + self.assertEqual(real.call_args[1]['timeout'], 0.2) + callback = real.call_args[1]['callback'] + + callback({'foo': {'ok': 'pong'}}) + self.assertIn('OK', out.getvalue()) + + instance = real.return_value = Mock() + instance.ping.return_value = None + with self.assertRaises(Error): + i.run('ping') + + out.seek(0) + out.truncate() + i.quiet = True + i.say_chat('<-', 'hello') + self.assertFalse(out.getvalue()) + + +class test_control(AppCase): + + def control(self, patch_call, *args, **kwargs): + kwargs.setdefault('app', Mock(name='app')) + c = control(*args, **kwargs) + if patch_call: + c.call = Mock(name='control.call') + return c + + def test_call(self): + i = self.control(False) + i.call('foo', 1, kw=2) + i.app.control.foo.assert_called_with(1, kw=2, reply=True) + + def test_pool_grow(self): + i = self.control(True) + i.pool_grow('pool_grow', n=2) + i.call.assert_called_with('pool_grow', 2) + + def test_pool_shrink(self): + i = self.control(True) + i.pool_shrink('pool_shrink', n=2) + i.call.assert_called_with('pool_shrink', 2) + + def test_autoscale(self): + i = self.control(True) + i.autoscale('autoscale', max=3, min=2) + i.call.assert_called_with('autoscale', 3, 2) + + def test_rate_limit(self): + i = self.control(True) + i.rate_limit('rate_limit', 'proj.add', '1/s') + i.call.assert_called_with('rate_limit', 'proj.add', '1/s') + + def test_time_limit(self): + i = self.control(True) + i.time_limit('time_limit', 'proj.add', 10, 30) + i.call.assert_called_with('time_limit', 'proj.add', 10, 30) + + def test_add_consumer(self): + i = self.control(True) + i.add_consumer( + 'add_consumer', 'queue', 'exchange', 'topic', 'rkey', + durable=True, + ) + i.call.assert_called_with( + 'add_consumer', 'queue', 'exchange', 'topic', 'rkey', + durable=True, + ) + + def test_cancel_consumer(self): + i = self.control(True) + i.cancel_consumer('cancel_consumer', 'queue') + i.call.assert_called_with('cancel_consumer', 'queue') + + +class test_multi(AppCase): + + def test_get_options(self): + self.assertTupleEqual(multi(app=self.app).get_options(), ()) + + def test_run_from_argv(self): + with patch('celery.bin.multi.MultiTool') as MultiTool: + m = MultiTool.return_value = Mock() + multi(self.app).run_from_argv('celery', ['arg'], command='multi') + m.execute_from_commandline.assert_called_with( + ['multi', 'arg'], 'celery', + ) + + +class test_main(AppCase): + + @patch('celery.bin.celery.CeleryCommand') + def test_main(self, Command): + cmd = Command.return_value = Mock() + mainfun() + cmd.execute_from_commandline.assert_called_with(None) + + @patch('celery.bin.celery.CeleryCommand') + def test_main_KeyboardInterrupt(self, Command): + cmd = Command.return_value = Mock() + cmd.execute_from_commandline.side_effect = KeyboardInterrupt() + mainfun() + cmd.execute_from_commandline.assert_called_with(None) + + +class test_compat(AppCase): + + def test_compat_command_decorator(self): + with patch('celery.bin.celery.CeleryCommand') as CC: + self.assertEqual(command(), CC.register_command) + fun = Mock(name='fun') + command(fun) + CC.register_command.assert_called_with(fun) diff --git a/celery/tests/bin/test_celeryd_detach.py b/celery/tests/bin/test_celeryd_detach.py new file mode 100644 index 0000000..a3675e2 --- /dev/null +++ b/celery/tests/bin/test_celeryd_detach.py @@ -0,0 +1,101 @@ +from __future__ import absolute_import + +from celery.platforms import IS_WINDOWS +from celery.bin.celeryd_detach import ( + detach, + detached_celeryd, + main, +) + +from celery.tests.case import AppCase, Mock, override_stdouts, patch + + +if not IS_WINDOWS: + class test_detached(AppCase): + + @patch('celery.bin.celeryd_detach.detached') + @patch('os.execv') + @patch('celery.bin.celeryd_detach.logger') + @patch('celery.app.log.Logging.setup_logging_subsystem') + def test_execs(self, setup_logs, logger, execv, detached): + context = detached.return_value = Mock() + context.__enter__ = Mock() + context.__exit__ = Mock() + + detach('/bin/boo', ['a', 'b', 'c'], logfile='/var/log', + pidfile='/var/pid') + detached.assert_called_with('/var/log', '/var/pid', None, None, + None, None, False) + execv.assert_called_with('/bin/boo', ['/bin/boo', 'a', 'b', 'c']) + + execv.side_effect = Exception('foo') + r = detach('/bin/boo', ['a', 'b', 'c'], + logfile='/var/log', pidfile='/var/pid', app=self.app) + context.__enter__.assert_called_with() + self.assertTrue(logger.critical.called) + setup_logs.assert_called_with('ERROR', '/var/log') + self.assertEqual(r, 1) + + +class test_PartialOptionParser(AppCase): + + def test_parser(self): + x = detached_celeryd(self.app) + p = x.Parser('celeryd_detach') + options, values = p.parse_args(['--logfile=foo', '--fake', '--enable', + 'a', 'b', '-c1', '-d', '2']) + self.assertEqual(options.logfile, 'foo') + self.assertEqual(values, ['a', 'b']) + self.assertEqual(p.leftovers, ['--enable', '-c1', '-d', '2']) + + with override_stdouts(): + with self.assertRaises(SystemExit): + p.parse_args(['--logfile']) + p.get_option('--logfile').nargs = 2 + with self.assertRaises(SystemExit): + p.parse_args(['--logfile=a']) + with self.assertRaises(SystemExit): + p.parse_args(['--fake=abc']) + + assert p.get_option('--logfile').nargs == 2 + p.parse_args(['--logfile=a', 'b']) + p.get_option('--logfile').nargs = 1 + + +class test_Command(AppCase): + argv = ['--autoscale=10,2', '-c', '1', + '--logfile=/var/log', '-lDEBUG', + '--', '.disable_rate_limits=1'] + + def test_parse_options(self): + x = detached_celeryd(app=self.app) + o, v, l = x.parse_options('cd', self.argv) + self.assertEqual(o.logfile, '/var/log') + self.assertEqual(l, ['--autoscale=10,2', '-c', '1', + '-lDEBUG', '--logfile=/var/log', + '--pidfile=celeryd.pid']) + x.parse_options('cd', []) # no args + + @patch('sys.exit') + @patch('celery.bin.celeryd_detach.detach') + def test_execute_from_commandline(self, detach, exit): + x = detached_celeryd(app=self.app) + x.execute_from_commandline(self.argv) + self.assertTrue(exit.called) + detach.assert_called_with( + path=x.execv_path, uid=None, gid=None, + umask=None, fake=False, logfile='/var/log', pidfile='celeryd.pid', + working_directory=None, + argv=x.execv_argv + [ + '-c', '1', '-lDEBUG', + '--logfile=/var/log', '--pidfile=celeryd.pid', + '--', '.disable_rate_limits=1' + ], + app=self.app, + ) + + @patch('celery.bin.celeryd_detach.detached_celeryd') + def test_main(self, command): + c = command.return_value = Mock() + main(self.app) + c.execute_from_commandline.assert_called_with() diff --git a/celery/tests/bin/test_celeryevdump.py b/celery/tests/bin/test_celeryevdump.py new file mode 100644 index 0000000..09cdc4d --- /dev/null +++ b/celery/tests/bin/test_celeryevdump.py @@ -0,0 +1,68 @@ +from __future__ import absolute_import + +from time import time + +from celery.events.dumper import ( + humanize_type, + Dumper, + evdump, +) + +from celery.tests.case import AppCase, Mock, WhateverIO, patch + + +class test_Dumper(AppCase): + + def setup(self): + self.out = WhateverIO() + self.dumper = Dumper(out=self.out) + + def test_humanize_type(self): + self.assertEqual(humanize_type('worker-offline'), 'shutdown') + self.assertEqual(humanize_type('task-started'), 'task started') + + def test_format_task_event(self): + self.dumper.format_task_event( + 'worker@example.com', time(), 'task-started', 'tasks.add', {}) + self.assertTrue(self.out.getvalue()) + + def test_on_event(self): + event = { + 'hostname': 'worker@example.com', + 'timestamp': time(), + 'uuid': '1ef', + 'name': 'tasks.add', + 'args': '(2, 2)', + 'kwargs': '{}', + } + self.dumper.on_event(dict(event, type='task-received')) + self.assertTrue(self.out.getvalue()) + self.dumper.on_event(dict(event, type='task-revoked')) + self.dumper.on_event(dict(event, type='worker-online')) + + @patch('celery.events.EventReceiver.capture') + def test_evdump(self, capture): + capture.side_effect = KeyboardInterrupt() + evdump(app=self.app) + + def test_evdump_error_handler(self): + app = Mock(name='app') + with patch('celery.events.dumper.Dumper') as Dumper: + Dumper.return_value = Mock(name='dumper') + recv = app.events.Receiver.return_value = Mock() + + def se(*_a, **_k): + recv.capture.side_effect = SystemExit() + raise KeyError() + recv.capture.side_effect = se + + Conn = app.connection.return_value = Mock(name='conn') + conn = Conn.clone.return_value = Mock(name='cloned_conn') + conn.connection_errors = (KeyError, ) + conn.channel_errors = () + + evdump(app) + self.assertTrue(conn.ensure_connection.called) + errback = conn.ensure_connection.call_args[0][0] + errback(KeyError(), 1) + self.assertTrue(conn.as_uri.called) diff --git a/celery/tests/bin/test_events.py b/celery/tests/bin/test_events.py new file mode 100644 index 0000000..a6e79f7 --- /dev/null +++ b/celery/tests/bin/test_events.py @@ -0,0 +1,73 @@ +from __future__ import absolute_import + +from celery.bin import events + +from celery.tests.case import AppCase, SkipTest, patch, _old_patch + + +class MockCommand(object): + executed = [] + + def execute_from_commandline(self, **kwargs): + self.executed.append(True) + + +def proctitle(prog, info=None): + proctitle.last = (prog, info) +proctitle.last = () + + +class test_events(AppCase): + + def setup(self): + self.ev = events.events(app=self.app) + + @_old_patch('celery.events.dumper', 'evdump', + lambda **kw: 'me dumper, you?') + @_old_patch('celery.bin.events', 'set_process_title', proctitle) + def test_run_dump(self): + self.assertEqual(self.ev.run(dump=True), 'me dumper, you?') + self.assertIn('celery events:dump', proctitle.last[0]) + + def test_run_top(self): + try: + import curses # noqa + except ImportError: + raise SkipTest('curses monitor requires curses') + + @_old_patch('celery.events.cursesmon', 'evtop', + lambda **kw: 'me top, you?') + @_old_patch('celery.bin.events', 'set_process_title', proctitle) + def _inner(): + self.assertEqual(self.ev.run(), 'me top, you?') + self.assertIn('celery events:top', proctitle.last[0]) + return _inner() + + @_old_patch('celery.events.snapshot', 'evcam', + lambda *a, **k: (a, k)) + @_old_patch('celery.bin.events', 'set_process_title', proctitle) + def test_run_cam(self): + a, kw = self.ev.run(camera='foo.bar.baz', logfile='logfile') + self.assertEqual(a[0], 'foo.bar.baz') + self.assertEqual(kw['freq'], 1.0) + self.assertIsNone(kw['maxrate']) + self.assertEqual(kw['loglevel'], 'INFO') + self.assertEqual(kw['logfile'], 'logfile') + self.assertIn('celery events:cam', proctitle.last[0]) + + @patch('celery.events.snapshot.evcam') + @patch('celery.bin.events.detached') + def test_run_cam_detached(self, detached, evcam): + self.ev.prog_name = 'celery events' + self.ev.run_evcam('myapp.Camera', detach=True) + self.assertTrue(detached.called) + self.assertTrue(evcam.called) + + def test_get_options(self): + self.assertTrue(self.ev.get_options()) + + @_old_patch('celery.bin.events', 'events', MockCommand) + def test_main(self): + MockCommand.executed = [] + events.main() + self.assertTrue(MockCommand.executed) diff --git a/celery/tests/bin/test_multi.py b/celery/tests/bin/test_multi.py new file mode 100644 index 0000000..0b2ecd9 --- /dev/null +++ b/celery/tests/bin/test_multi.py @@ -0,0 +1,474 @@ +from __future__ import absolute_import + +import errno +import signal +import sys + +from celery.bin.multi import ( + main, + MultiTool, + findsig, + abbreviations, + parse_ns_range, + format_opt, + quote, + NamespacedOptionParser, + multi_args, + __doc__ as doc, +) + +from celery.tests.case import AppCase, Mock, WhateverIO, SkipTest, patch + + +class test_functions(AppCase): + + def test_findsig(self): + self.assertEqual(findsig(['a', 'b', 'c', '-1']), 1) + self.assertEqual(findsig(['--foo=1', '-9']), 9) + self.assertEqual(findsig(['-INT']), signal.SIGINT) + self.assertEqual(findsig([]), signal.SIGTERM) + self.assertEqual(findsig(['-s']), signal.SIGTERM) + self.assertEqual(findsig(['-log']), signal.SIGTERM) + + def test_abbreviations(self): + expander = abbreviations({'%s': 'START', + '%x': 'STOP'}) + self.assertEqual(expander('foo%s'), 'fooSTART') + self.assertEqual(expander('foo%x'), 'fooSTOP') + self.assertEqual(expander('foo%y'), 'foo%y') + self.assertIsNone(expander(None)) + + def test_parse_ns_range(self): + self.assertEqual(parse_ns_range('1-3', True), ['1', '2', '3']) + self.assertEqual(parse_ns_range('1-3', False), ['1-3']) + self.assertEqual(parse_ns_range( + '1-3,10,11,20', True), + ['1', '2', '3', '10', '11', '20'], + ) + + def test_format_opt(self): + self.assertEqual(format_opt('--foo', None), '--foo') + self.assertEqual(format_opt('-c', 1), '-c 1') + self.assertEqual(format_opt('--log', 'foo'), '--log=foo') + + def test_quote(self): + self.assertEqual(quote("the 'quick"), "'the '\\''quick'") + + +class test_NamespacedOptionParser(AppCase): + + def test_parse(self): + x = NamespacedOptionParser(['-c:1,3', '4']) + self.assertEqual(x.namespaces.get('1,3'), {'-c': '4'}) + x = NamespacedOptionParser(['-c:jerry,elaine', '5', + '--loglevel:kramer=DEBUG', + '--flag', + '--logfile=foo', '-Q', 'bar', 'a', 'b', + '--', '.disable_rate_limits=1']) + self.assertEqual(x.options, {'--logfile': 'foo', + '-Q': 'bar', + '--flag': None}) + self.assertEqual(x.values, ['a', 'b']) + self.assertEqual(x.namespaces.get('jerry,elaine'), {'-c': '5'}) + self.assertEqual(x.namespaces.get('kramer'), {'--loglevel': 'DEBUG'}) + self.assertEqual(x.passthrough, '-- .disable_rate_limits=1') + + +class test_multi_args(AppCase): + + @patch('socket.gethostname') + def test_parse(self, gethostname): + p = NamespacedOptionParser([ + '-c:jerry,elaine', '5', + '--loglevel:kramer=DEBUG', + '--flag', + '--logfile=foo', '-Q', 'bar', 'jerry', + 'elaine', 'kramer', + '--', '.disable_rate_limits=1', + ]) + it = multi_args(p, cmd='COMMAND', append='*AP*', + prefix='*P*', suffix='*S*') + names = list(it) + + def assert_line_in(name, args): + self.assertIn(name, [tup[0] for tup in names]) + argv = None + for item in names: + if item[0] == name: + argv = item[1] + self.assertTrue(argv) + for arg in args: + self.assertIn(arg, argv) + + assert_line_in( + '*P*jerry@*S*', + ['COMMAND', '-n *P*jerry@*S*', '-Q bar', + '-c 5', '--flag', '--logfile=foo', + '-- .disable_rate_limits=1', '*AP*'], + ) + assert_line_in( + '*P*elaine@*S*', + ['COMMAND', '-n *P*elaine@*S*', '-Q bar', + '-c 5', '--flag', '--logfile=foo', + '-- .disable_rate_limits=1', '*AP*'], + ) + assert_line_in( + '*P*kramer@*S*', + ['COMMAND', '--loglevel=DEBUG', '-n *P*kramer@*S*', + '-Q bar', '--flag', '--logfile=foo', + '-- .disable_rate_limits=1', '*AP*'], + ) + expand = names[0][2] + self.assertEqual(expand('%h'), '*P*jerry@*S*') + self.assertEqual(expand('%n'), 'jerry') + names2 = list(multi_args(p, cmd='COMMAND', append='', + prefix='*P*', suffix='*S*')) + self.assertEqual(names2[0][1][-1], '-- .disable_rate_limits=1') + + gethostname.return_value = 'example.com' + p2 = NamespacedOptionParser(['10', '-c:1', '5']) + names3 = list(multi_args(p2, cmd='COMMAND')) + self.assertEqual(len(names3), 10) + self.assertEqual( + names3[0][0:2], + ('celery1@example.com', + ['COMMAND', '-n celery1@example.com', '-c 5', '']), + ) + for i, worker in enumerate(names3[1:]): + self.assertEqual( + worker[0:2], + ('celery%s@example.com' % (i + 2), + ['COMMAND', '-n celery%s@example.com' % (i + 2), '']), + ) + + names4 = list(multi_args(p2, cmd='COMMAND', suffix='""')) + self.assertEqual(len(names4), 10) + self.assertEqual( + names4[0][0:2], + ('celery1@', + ['COMMAND', '-n celery1@', '-c 5', '']), + ) + + p3 = NamespacedOptionParser(['foo@', '-c:foo', '5']) + names5 = list(multi_args(p3, cmd='COMMAND', suffix='""')) + self.assertEqual( + names5[0][0:2], + ('foo@', + ['COMMAND', '-n foo@', '-c 5', '']), + ) + + +class test_MultiTool(AppCase): + + def setup(self): + self.fh = WhateverIO() + self.env = {} + self.t = MultiTool(env=self.env, fh=self.fh) + + def test_note(self): + self.t.note('hello world') + self.assertEqual(self.fh.getvalue(), 'hello world\n') + + def test_note_quiet(self): + self.t.quiet = True + self.t.note('hello world') + self.assertFalse(self.fh.getvalue()) + + def test_info(self): + self.t.verbose = True + self.t.info('hello info') + self.assertEqual(self.fh.getvalue(), 'hello info\n') + + def test_info_not_verbose(self): + self.t.verbose = False + self.t.info('hello info') + self.assertFalse(self.fh.getvalue()) + + def test_error(self): + self.t.say = Mock() + self.t.usage = Mock() + self.assertEqual(self.t.error('foo'), 1) + self.t.say.assert_called_with('foo') + self.t.usage.assert_called_with() + + self.t.say = Mock() + self.assertEqual(self.t.error(), 1) + self.assertFalse(self.t.say.called) + + self.assertEqual(self.t.retcode, 1) + + @patch('celery.bin.multi.Popen') + def test_waitexec(self, Popen): + self.t.note = Mock() + pipe = Popen.return_value = Mock() + pipe.wait.return_value = -10 + self.assertEqual(self.t.waitexec(['-m', 'foo'], 'path'), 10) + Popen.assert_called_with(['path', '-m', 'foo'], env=self.t.env) + self.t.note.assert_called_with('* Child was terminated by signal 10') + + pipe.wait.return_value = 2 + self.assertEqual(self.t.waitexec(['-m', 'foo'], 'path'), 2) + self.t.note.assert_called_with( + '* Child terminated with errorcode 2', + ) + + pipe.wait.return_value = 0 + self.assertFalse(self.t.waitexec(['-m', 'foo', 'path'])) + + def test_nosplash(self): + self.t.nosplash = True + self.t.splash() + self.assertFalse(self.fh.getvalue()) + + def test_splash(self): + self.t.nosplash = False + self.t.splash() + self.assertIn('celery multi', self.fh.getvalue()) + + def test_usage(self): + self.t.usage() + self.assertTrue(self.fh.getvalue()) + + def test_help(self): + self.t.help([]) + self.assertIn(doc, self.fh.getvalue()) + + def test_expand(self): + self.t.expand(['foo%n', 'ask', 'klask', 'dask']) + self.assertEqual( + self.fh.getvalue(), 'fooask\nfooklask\nfoodask\n', + ) + + def test_restart(self): + stop = self.t._stop_nodes = Mock() + self.t.restart(['jerry', 'george'], 'celery worker') + waitexec = self.t.waitexec = Mock() + self.assertTrue(stop.called) + callback = stop.call_args[1]['callback'] + self.assertTrue(callback) + + waitexec.return_value = 0 + callback('jerry', ['arg'], 13) + waitexec.assert_called_with(['arg']) + self.assertIn('OK', self.fh.getvalue()) + self.fh.seek(0) + self.fh.truncate() + + waitexec.return_value = 1 + callback('jerry', ['arg'], 13) + self.assertIn('FAILED', self.fh.getvalue()) + + def test_stop(self): + self.t.getpids = Mock() + self.t.getpids.return_value = [2, 3, 4] + self.t.shutdown_nodes = Mock() + self.t.stop(['a', 'b', '-INT'], 'celery worker') + self.t.shutdown_nodes.assert_called_with( + [2, 3, 4], sig=signal.SIGINT, retry=None, callback=None, + + ) + + def test_kill(self): + if not hasattr(signal, 'SIGKILL'): + raise SkipTest('SIGKILL not supported by this platform') + self.t.getpids = Mock() + self.t.getpids.return_value = [ + ('a', None, 10), + ('b', None, 11), + ('c', None, 12) + ] + sig = self.t.signal_node = Mock() + + self.t.kill(['a', 'b', 'c'], 'celery worker') + + sigs = sig.call_args_list + self.assertEqual(len(sigs), 3) + self.assertEqual(sigs[0][0], ('a', 10, signal.SIGKILL)) + self.assertEqual(sigs[1][0], ('b', 11, signal.SIGKILL)) + self.assertEqual(sigs[2][0], ('c', 12, signal.SIGKILL)) + + def prepare_pidfile_for_getpids(self, Pidfile): + class pids(object): + + def __init__(self, path): + self.path = path + + def read_pid(self): + try: + return {'foo.pid': 10, + 'bar.pid': 11}[self.path] + except KeyError: + raise ValueError() + Pidfile.side_effect = pids + + @patch('celery.bin.multi.Pidfile') + @patch('socket.gethostname') + def test_getpids(self, gethostname, Pidfile): + gethostname.return_value = 'e.com' + self.prepare_pidfile_for_getpids(Pidfile) + callback = Mock() + + p = NamespacedOptionParser(['foo', 'bar', 'baz']) + nodes = self.t.getpids(p, 'celery worker', callback=callback) + node_0, node_1 = nodes + self.assertEqual(node_0[0], 'foo@e.com') + self.assertEqual( + sorted(node_0[1]), + sorted(('celery worker', '--pidfile=foo.pid', + '-n foo@e.com', '')), + ) + self.assertEqual(node_0[2], 10) + + self.assertEqual(node_1[0], 'bar@e.com') + self.assertEqual( + sorted(node_1[1]), + sorted(('celery worker', '--pidfile=bar.pid', + '-n bar@e.com', '')), + ) + self.assertEqual(node_1[2], 11) + self.assertTrue(callback.called) + cargs, _ = callback.call_args + self.assertEqual(cargs[0], 'baz@e.com') + self.assertItemsEqual( + cargs[1], + ['celery worker', '--pidfile=baz.pid', '-n baz@e.com', ''], + ) + self.assertIsNone(cargs[2]) + self.assertIn('DOWN', self.fh.getvalue()) + + # without callback, should work + nodes = self.t.getpids(p, 'celery worker', callback=None) + + @patch('celery.bin.multi.Pidfile') + @patch('socket.gethostname') + @patch('celery.bin.multi.sleep') + def test_shutdown_nodes(self, slepp, gethostname, Pidfile): + gethostname.return_value = 'e.com' + self.prepare_pidfile_for_getpids(Pidfile) + self.assertIsNone(self.t.shutdown_nodes([])) + self.t.signal_node = Mock() + node_alive = self.t.node_alive = Mock() + self.t.node_alive.return_value = False + + callback = Mock() + self.t.stop(['foo', 'bar', 'baz'], 'celery worker', callback=callback) + sigs = sorted(self.t.signal_node.call_args_list) + self.assertEqual(len(sigs), 2) + self.assertIn( + ('foo@e.com', 10, signal.SIGTERM), + [tup[0] for tup in sigs], + ) + self.assertIn( + ('bar@e.com', 11, signal.SIGTERM), + [tup[0] for tup in sigs], + ) + self.t.signal_node.return_value = False + self.assertTrue(callback.called) + self.t.stop(['foo', 'bar', 'baz'], 'celery worker', callback=None) + + def on_node_alive(pid): + if node_alive.call_count > 4: + return True + return False + self.t.signal_node.return_value = True + self.t.node_alive.side_effect = on_node_alive + self.t.stop(['foo', 'bar', 'baz'], 'celery worker', retry=True) + + @patch('os.kill') + def test_node_alive(self, kill): + kill.return_value = True + self.assertTrue(self.t.node_alive(13)) + esrch = OSError() + esrch.errno = errno.ESRCH + kill.side_effect = esrch + self.assertFalse(self.t.node_alive(13)) + kill.assert_called_with(13, 0) + + enoent = OSError() + enoent.errno = errno.ENOENT + kill.side_effect = enoent + with self.assertRaises(OSError): + self.t.node_alive(13) + + @patch('os.kill') + def test_signal_node(self, kill): + kill.return_value = True + self.assertTrue(self.t.signal_node('foo', 13, 9)) + esrch = OSError() + esrch.errno = errno.ESRCH + kill.side_effect = esrch + self.assertFalse(self.t.signal_node('foo', 13, 9)) + kill.assert_called_with(13, 9) + self.assertIn('Could not signal foo', self.fh.getvalue()) + + enoent = OSError() + enoent.errno = errno.ENOENT + kill.side_effect = enoent + with self.assertRaises(OSError): + self.t.signal_node('foo', 13, 9) + + def test_start(self): + self.t.waitexec = Mock() + self.t.waitexec.return_value = 0 + self.assertFalse(self.t.start(['foo', 'bar', 'baz'], 'celery worker')) + + self.t.waitexec.return_value = 1 + self.assertFalse(self.t.start(['foo', 'bar', 'baz'], 'celery worker')) + + def test_show(self): + self.t.show(['foo', 'bar', 'baz'], 'celery worker') + self.assertTrue(self.fh.getvalue()) + + @patch('socket.gethostname') + def test_get(self, gethostname): + gethostname.return_value = 'e.com' + self.t.get(['xuzzy@e.com', 'foo', 'bar', 'baz'], 'celery worker') + self.assertFalse(self.fh.getvalue()) + self.t.get(['foo@e.com', 'foo', 'bar', 'baz'], 'celery worker') + self.assertTrue(self.fh.getvalue()) + + @patch('socket.gethostname') + def test_names(self, gethostname): + gethostname.return_value = 'e.com' + self.t.names(['foo', 'bar', 'baz'], 'celery worker') + self.assertIn('foo@e.com\nbar@e.com\nbaz@e.com', self.fh.getvalue()) + + def test_execute_from_commandline(self): + start = self.t.commands['start'] = Mock() + self.t.error = Mock() + self.t.execute_from_commandline(['multi', 'start', 'foo', 'bar']) + self.assertFalse(self.t.error.called) + start.assert_called_with(['foo', 'bar'], 'celery worker') + + self.t.error = Mock() + self.t.execute_from_commandline(['multi', 'frob', 'foo', 'bar']) + self.t.error.assert_called_with('Invalid command: frob') + + self.t.error = Mock() + self.t.execute_from_commandline(['multi']) + self.t.error.assert_called_with() + + self.t.error = Mock() + self.t.execute_from_commandline(['multi', '-foo']) + self.t.error.assert_called_with() + + self.t.execute_from_commandline( + ['multi', 'start', 'foo', + '--nosplash', '--quiet', '-q', '--verbose', '--no-color'], + ) + self.assertTrue(self.t.nosplash) + self.assertTrue(self.t.quiet) + self.assertTrue(self.t.verbose) + self.assertTrue(self.t.no_color) + + def test_stopwait(self): + self.t._stop_nodes = Mock() + self.t.stopwait(['foo', 'bar', 'baz'], 'celery worker') + self.assertEqual(self.t._stop_nodes.call_args[1]['retry'], 2) + + @patch('celery.bin.multi.MultiTool') + def test_main(self, MultiTool): + m = MultiTool.return_value = Mock() + with self.assertRaises(SystemExit): + main() + m.execute_from_commandline.assert_called_with(sys.argv) diff --git a/celery/tests/bin/test_worker.py b/celery/tests/bin/test_worker.py new file mode 100644 index 0000000..cd8a91b --- /dev/null +++ b/celery/tests/bin/test_worker.py @@ -0,0 +1,702 @@ +from __future__ import absolute_import + +import logging +import os +import sys + +from functools import wraps + +from billiard import current_process +from kombu import Exchange, Queue + +from celery import platforms +from celery import signals +from celery.app import trace +from celery.apps import worker as cd +from celery.bin.worker import worker, main as worker_main +from celery.exceptions import ( + ImproperlyConfigured, WorkerShutdown, WorkerTerminate, +) +from celery.utils.log import ensure_process_aware_logger +from celery.worker import state + +from celery.tests.case import ( + AppCase, + Mock, + SkipTest, + WhateverIO, + patch, + skip_if_pypy, + skip_if_jython, +) + +ensure_process_aware_logger() + + +class WorkerAppCase(AppCase): + + def tearDown(self): + super(WorkerAppCase, self).tearDown() + trace.reset_worker_optimizations() + + +def disable_stdouts(fun): + + @wraps(fun) + def disable(*args, **kwargs): + prev_out, prev_err = sys.stdout, sys.stderr + prev_rout, prev_rerr = sys.__stdout__, sys.__stderr__ + sys.stdout = sys.__stdout__ = WhateverIO() + sys.stderr = sys.__stderr__ = WhateverIO() + try: + return fun(*args, **kwargs) + finally: + sys.stdout = prev_out + sys.stderr = prev_err + sys.__stdout__ = prev_rout + sys.__stderr__ = prev_rerr + + return disable + + +class Worker(cd.Worker): + redirect_stdouts = False + + def start(self, *args, **kwargs): + self.on_start() + + +class test_Worker(WorkerAppCase): + Worker = Worker + + @disable_stdouts + def test_queues_string(self): + w = self.app.Worker() + w.setup_queues('foo,bar,baz') + self.assertTrue('foo' in self.app.amqp.queues) + + @disable_stdouts + def test_cpu_count(self): + with patch('celery.worker.cpu_count') as cpu_count: + cpu_count.side_effect = NotImplementedError() + w = self.app.Worker(concurrency=None) + self.assertEqual(w.concurrency, 2) + w = self.app.Worker(concurrency=5) + self.assertEqual(w.concurrency, 5) + + @disable_stdouts + def test_windows_B_option(self): + self.app.IS_WINDOWS = True + with self.assertRaises(SystemExit): + worker(app=self.app).run(beat=True) + + def test_setup_concurrency_very_early(self): + x = worker() + x.run = Mock() + with self.assertRaises(ImportError): + x.execute_from_commandline(['worker', '-P', 'xyzybox']) + + def test_run_from_argv_basic(self): + x = worker(app=self.app) + x.run = Mock() + x.maybe_detach = Mock() + + def run(*args, **kwargs): + pass + x.run = run + x.run_from_argv('celery', []) + self.assertTrue(x.maybe_detach.called) + + def test_maybe_detach(self): + x = worker(app=self.app) + with patch('celery.bin.worker.detached_celeryd') as detached: + x.maybe_detach([]) + self.assertFalse(detached.called) + with self.assertRaises(SystemExit): + x.maybe_detach(['--detach']) + self.assertTrue(detached.called) + + @disable_stdouts + def test_invalid_loglevel_gives_error(self): + x = worker(app=self.app) + with self.assertRaises(SystemExit): + x.run(loglevel='GRIM_REAPER') + + def test_no_loglevel(self): + self.app.Worker = Mock() + worker(app=self.app).run(loglevel=None) + + def test_tasklist(self): + worker = self.app.Worker() + self.assertTrue(worker.app.tasks) + self.assertTrue(worker.app.finalized) + self.assertTrue(worker.tasklist(include_builtins=True)) + worker.tasklist(include_builtins=False) + + def test_extra_info(self): + worker = self.app.Worker() + worker.loglevel = logging.WARNING + self.assertFalse(worker.extra_info()) + worker.loglevel = logging.INFO + self.assertTrue(worker.extra_info()) + + @disable_stdouts + def test_loglevel_string(self): + worker = self.Worker(app=self.app, loglevel='INFO') + self.assertEqual(worker.loglevel, logging.INFO) + + @disable_stdouts + def test_run_worker(self): + handlers = {} + + class Signals(platforms.Signals): + + def __setitem__(self, sig, handler): + handlers[sig] = handler + + p = platforms.signals + platforms.signals = Signals() + try: + w = self.Worker(app=self.app) + w._isatty = False + w.on_start() + for sig in 'SIGINT', 'SIGHUP', 'SIGTERM': + self.assertIn(sig, handlers) + + handlers.clear() + w = self.Worker(app=self.app) + w._isatty = True + w.on_start() + for sig in 'SIGINT', 'SIGTERM': + self.assertIn(sig, handlers) + self.assertNotIn('SIGHUP', handlers) + finally: + platforms.signals = p + + @disable_stdouts + def test_startup_info(self): + worker = self.Worker(app=self.app) + worker.on_start() + self.assertTrue(worker.startup_info()) + worker.loglevel = logging.DEBUG + self.assertTrue(worker.startup_info()) + worker.loglevel = logging.INFO + self.assertTrue(worker.startup_info()) + worker.autoscale = 13, 10 + self.assertTrue(worker.startup_info()) + + prev_loader = self.app.loader + worker = self.Worker(app=self.app, queues='foo,bar,baz,xuzzy,do,re,mi') + self.app.loader = Mock() + self.app.loader.__module__ = 'acme.baked_beans' + self.assertTrue(worker.startup_info()) + + self.app.loader = Mock() + self.app.loader.__module__ = 'celery.loaders.foo' + self.assertTrue(worker.startup_info()) + + from celery.loaders.app import AppLoader + self.app.loader = AppLoader(app=self.app) + self.assertTrue(worker.startup_info()) + + self.app.loader = prev_loader + worker.send_events = True + self.assertTrue(worker.startup_info()) + + # test when there are too few output lines + # to draft the ascii art onto + prev, cd.ARTLINES = cd.ARTLINES, ['the quick brown fox'] + try: + self.assertTrue(worker.startup_info()) + finally: + cd.ARTLINES = prev + + @disable_stdouts + def test_run(self): + self.Worker(app=self.app).on_start() + self.Worker(app=self.app, purge=True).on_start() + worker = self.Worker(app=self.app) + worker.on_start() + + @disable_stdouts + def test_purge_messages(self): + self.Worker(app=self.app).purge_messages() + + @disable_stdouts + def test_init_queues(self): + app = self.app + c = app.conf + app.amqp.queues = app.amqp.Queues({ + 'celery': {'exchange': 'celery', + 'routing_key': 'celery'}, + 'video': {'exchange': 'video', + 'routing_key': 'video'}, + }) + worker = self.Worker(app=self.app) + worker.setup_queues(['video']) + self.assertIn('video', app.amqp.queues) + self.assertIn('video', app.amqp.queues.consume_from) + self.assertIn('celery', app.amqp.queues) + self.assertNotIn('celery', app.amqp.queues.consume_from) + + c.CELERY_CREATE_MISSING_QUEUES = False + del(app.amqp.queues) + with self.assertRaises(ImproperlyConfigured): + self.Worker(app=self.app).setup_queues(['image']) + del(app.amqp.queues) + c.CELERY_CREATE_MISSING_QUEUES = True + worker = self.Worker(app=self.app) + worker.setup_queues(['image']) + self.assertIn('image', app.amqp.queues.consume_from) + self.assertEqual( + Queue('image', Exchange('image'), routing_key='image'), + app.amqp.queues['image'], + ) + + @disable_stdouts + def test_autoscale_argument(self): + worker1 = self.Worker(app=self.app, autoscale='10,3') + self.assertListEqual(worker1.autoscale, [10, 3]) + worker2 = self.Worker(app=self.app, autoscale='10') + self.assertListEqual(worker2.autoscale, [10, 0]) + self.assert_no_logging_side_effect() + + def test_include_argument(self): + worker1 = self.Worker(app=self.app, include='os') + self.assertListEqual(worker1.include, ['os']) + worker2 = self.Worker(app=self.app, + include='os,sys') + self.assertListEqual(worker2.include, ['os', 'sys']) + self.Worker(app=self.app, include=['os', 'sys']) + + @disable_stdouts + def test_unknown_loglevel(self): + with self.assertRaises(SystemExit): + worker(app=self.app).run(loglevel='ALIEN') + worker1 = self.Worker(app=self.app, loglevel=0xFFFF) + self.assertEqual(worker1.loglevel, 0xFFFF) + + @disable_stdouts + @patch('os._exit') + def test_warns_if_running_as_privileged_user(self, _exit): + app = self.app + if app.IS_WINDOWS: + raise SkipTest('Not applicable on Windows') + + with patch('os.getuid') as getuid: + getuid.return_value = 0 + self.app.conf.CELERY_ACCEPT_CONTENT = ['pickle'] + worker = self.Worker(app=self.app) + worker.on_start() + _exit.assert_called_with(1) + from celery import platforms + platforms.C_FORCE_ROOT = True + try: + with self.assertWarnsRegex( + RuntimeWarning, + r'absolutely not recommended'): + worker = self.Worker(app=self.app) + worker.on_start() + finally: + platforms.C_FORCE_ROOT = False + self.app.conf.CELERY_ACCEPT_CONTENT = ['json'] + with self.assertWarnsRegex( + RuntimeWarning, + r'absolutely not recommended'): + worker = self.Worker(app=self.app) + worker.on_start() + + @disable_stdouts + def test_redirect_stdouts(self): + self.Worker(app=self.app, redirect_stdouts=False) + with self.assertRaises(AttributeError): + sys.stdout.logger + + @disable_stdouts + def test_on_start_custom_logging(self): + self.app.log.redirect_stdouts = Mock() + worker = self.Worker(app=self.app, redirect_stoutds=True) + worker._custom_logging = True + worker.on_start() + self.assertFalse(self.app.log.redirect_stdouts.called) + + def test_setup_logging_no_color(self): + worker = self.Worker( + app=self.app, redirect_stdouts=False, no_color=True, + ) + prev, self.app.log.setup = self.app.log.setup, Mock() + try: + worker.setup_logging() + self.assertFalse(self.app.log.setup.call_args[1]['colorize']) + finally: + self.app.log.setup = prev + + @disable_stdouts + def test_startup_info_pool_is_str(self): + worker = self.Worker(app=self.app, redirect_stdouts=False) + worker.pool_cls = 'foo' + worker.startup_info() + + def test_redirect_stdouts_already_handled(self): + logging_setup = [False] + + @signals.setup_logging.connect + def on_logging_setup(**kwargs): + logging_setup[0] = True + + try: + worker = self.Worker(app=self.app, redirect_stdouts=False) + worker.app.log.already_setup = False + worker.setup_logging() + self.assertTrue(logging_setup[0]) + with self.assertRaises(AttributeError): + sys.stdout.logger + finally: + signals.setup_logging.disconnect(on_logging_setup) + + @disable_stdouts + def test_platform_tweaks_osx(self): + + class OSXWorker(Worker): + proxy_workaround_installed = False + + def osx_proxy_detection_workaround(self): + self.proxy_workaround_installed = True + + worker = OSXWorker(app=self.app, redirect_stdouts=False) + + def install_HUP_nosupport(controller): + controller.hup_not_supported_installed = True + + class Controller(object): + pass + + prev = cd.install_HUP_not_supported_handler + cd.install_HUP_not_supported_handler = install_HUP_nosupport + try: + worker.app.IS_OSX = True + controller = Controller() + worker.install_platform_tweaks(controller) + self.assertTrue(controller.hup_not_supported_installed) + self.assertTrue(worker.proxy_workaround_installed) + finally: + cd.install_HUP_not_supported_handler = prev + + @disable_stdouts + def test_general_platform_tweaks(self): + + restart_worker_handler_installed = [False] + + def install_worker_restart_handler(worker): + restart_worker_handler_installed[0] = True + + class Controller(object): + pass + + prev = cd.install_worker_restart_handler + cd.install_worker_restart_handler = install_worker_restart_handler + try: + worker = self.Worker(app=self.app) + worker.app.IS_OSX = False + worker.install_platform_tweaks(Controller()) + self.assertTrue(restart_worker_handler_installed[0]) + finally: + cd.install_worker_restart_handler = prev + + @disable_stdouts + def test_on_consumer_ready(self): + worker_ready_sent = [False] + + @signals.worker_ready.connect + def on_worker_ready(**kwargs): + worker_ready_sent[0] = True + + self.Worker(app=self.app).on_consumer_ready(object()) + self.assertTrue(worker_ready_sent[0]) + + +class test_funs(WorkerAppCase): + + def test_active_thread_count(self): + self.assertTrue(cd.active_thread_count()) + + @disable_stdouts + def test_set_process_status(self): + try: + __import__('setproctitle') + except ImportError: + raise SkipTest('setproctitle not installed') + worker = Worker(app=self.app, hostname='xyzza') + prev1, sys.argv = sys.argv, ['Arg0'] + try: + st = worker.set_process_status('Running') + self.assertIn('celeryd', st) + self.assertIn('xyzza', st) + self.assertIn('Running', st) + prev2, sys.argv = sys.argv, ['Arg0', 'Arg1'] + try: + st = worker.set_process_status('Running') + self.assertIn('celeryd', st) + self.assertIn('xyzza', st) + self.assertIn('Running', st) + self.assertIn('Arg1', st) + finally: + sys.argv = prev2 + finally: + sys.argv = prev1 + + @disable_stdouts + def test_parse_options(self): + cmd = worker() + cmd.app = self.app + opts, args = cmd.parse_options('worker', ['--concurrency=512', + '--heartbeat-interval=10']) + self.assertEqual(opts.concurrency, 512) + self.assertEqual(opts.heartbeat_interval, 10) + + @disable_stdouts + def test_main(self): + p, cd.Worker = cd.Worker, Worker + s, sys.argv = sys.argv, ['worker', '--discard'] + try: + worker_main(app=self.app) + finally: + cd.Worker = p + sys.argv = s + + +class test_signal_handlers(WorkerAppCase): + + class _Worker(object): + stopped = False + terminated = False + + def stop(self, in_sighandler=False): + self.stopped = True + + def terminate(self, in_sighandler=False): + self.terminated = True + + def psig(self, fun, *args, **kwargs): + handlers = {} + + class Signals(platforms.Signals): + def __setitem__(self, sig, handler): + handlers[sig] = handler + + p, platforms.signals = platforms.signals, Signals() + try: + fun(*args, **kwargs) + return handlers + finally: + platforms.signals = p + + @disable_stdouts + def test_worker_int_handler(self): + worker = self._Worker() + handlers = self.psig(cd.install_worker_int_handler, worker) + next_handlers = {} + state.should_stop = False + state.should_terminate = False + + class Signals(platforms.Signals): + + def __setitem__(self, sig, handler): + next_handlers[sig] = handler + + with patch('celery.apps.worker.active_thread_count') as c: + c.return_value = 3 + p, platforms.signals = platforms.signals, Signals() + try: + handlers['SIGINT']('SIGINT', object()) + self.assertTrue(state.should_stop) + finally: + platforms.signals = p + state.should_stop = False + + try: + next_handlers['SIGINT']('SIGINT', object()) + self.assertTrue(state.should_terminate) + finally: + state.should_terminate = False + + with patch('celery.apps.worker.active_thread_count') as c: + c.return_value = 1 + p, platforms.signals = platforms.signals, Signals() + try: + with self.assertRaises(WorkerShutdown): + handlers['SIGINT']('SIGINT', object()) + finally: + platforms.signals = p + + with self.assertRaises(WorkerTerminate): + next_handlers['SIGINT']('SIGINT', object()) + + @disable_stdouts + def test_worker_int_handler_only_stop_MainProcess(self): + try: + import _multiprocessing # noqa + except ImportError: + raise SkipTest('only relevant for multiprocessing') + process = current_process() + name, process.name = process.name, 'OtherProcess' + with patch('celery.apps.worker.active_thread_count') as c: + c.return_value = 3 + try: + worker = self._Worker() + handlers = self.psig(cd.install_worker_int_handler, worker) + handlers['SIGINT']('SIGINT', object()) + self.assertTrue(state.should_stop) + finally: + process.name = name + state.should_stop = False + + with patch('celery.apps.worker.active_thread_count') as c: + c.return_value = 1 + try: + worker = self._Worker() + handlers = self.psig(cd.install_worker_int_handler, worker) + with self.assertRaises(WorkerShutdown): + handlers['SIGINT']('SIGINT', object()) + finally: + process.name = name + state.should_stop = False + + @disable_stdouts + def test_install_HUP_not_supported_handler(self): + worker = self._Worker() + handlers = self.psig(cd.install_HUP_not_supported_handler, worker) + handlers['SIGHUP']('SIGHUP', object()) + + @disable_stdouts + def test_worker_term_hard_handler_only_stop_MainProcess(self): + try: + import _multiprocessing # noqa + except ImportError: + raise SkipTest('only relevant for multiprocessing') + process = current_process() + name, process.name = process.name, 'OtherProcess' + try: + with patch('celery.apps.worker.active_thread_count') as c: + c.return_value = 3 + worker = self._Worker() + handlers = self.psig( + cd.install_worker_term_hard_handler, worker) + try: + handlers['SIGQUIT']('SIGQUIT', object()) + self.assertTrue(state.should_terminate) + finally: + state.should_terminate = False + with patch('celery.apps.worker.active_thread_count') as c: + c.return_value = 1 + worker = self._Worker() + handlers = self.psig( + cd.install_worker_term_hard_handler, worker) + with self.assertRaises(WorkerTerminate): + handlers['SIGQUIT']('SIGQUIT', object()) + finally: + process.name = name + + @disable_stdouts + def test_worker_term_handler_when_threads(self): + with patch('celery.apps.worker.active_thread_count') as c: + c.return_value = 3 + worker = self._Worker() + handlers = self.psig(cd.install_worker_term_handler, worker) + try: + handlers['SIGTERM']('SIGTERM', object()) + self.assertTrue(state.should_stop) + finally: + state.should_stop = False + + @disable_stdouts + def test_worker_term_handler_when_single_thread(self): + with patch('celery.apps.worker.active_thread_count') as c: + c.return_value = 1 + worker = self._Worker() + handlers = self.psig(cd.install_worker_term_handler, worker) + try: + with self.assertRaises(WorkerShutdown): + handlers['SIGTERM']('SIGTERM', object()) + finally: + state.should_stop = False + + @patch('sys.__stderr__') + @skip_if_pypy + @skip_if_jython + def test_worker_cry_handler(self, stderr): + handlers = self.psig(cd.install_cry_handler) + self.assertIsNone(handlers['SIGUSR1']('SIGUSR1', object())) + self.assertTrue(stderr.write.called) + + @disable_stdouts + def test_worker_term_handler_only_stop_MainProcess(self): + try: + import _multiprocessing # noqa + except ImportError: + raise SkipTest('only relevant for multiprocessing') + process = current_process() + name, process.name = process.name, 'OtherProcess' + try: + with patch('celery.apps.worker.active_thread_count') as c: + c.return_value = 3 + worker = self._Worker() + handlers = self.psig(cd.install_worker_term_handler, worker) + handlers['SIGTERM']('SIGTERM', object()) + self.assertTrue(state.should_stop) + with patch('celery.apps.worker.active_thread_count') as c: + c.return_value = 1 + worker = self._Worker() + handlers = self.psig(cd.install_worker_term_handler, worker) + with self.assertRaises(WorkerShutdown): + handlers['SIGTERM']('SIGTERM', object()) + finally: + process.name = name + state.should_stop = False + + @disable_stdouts + @patch('celery.platforms.close_open_fds') + @patch('atexit.register') + @patch('os.close') + def test_worker_restart_handler(self, _close, register, close_open): + if getattr(os, 'execv', None) is None: + raise SkipTest('platform does not have excv') + argv = [] + + def _execv(*args): + argv.extend(args) + + execv, os.execv = os.execv, _execv + try: + worker = self._Worker() + handlers = self.psig(cd.install_worker_restart_handler, worker) + handlers['SIGHUP']('SIGHUP', object()) + self.assertTrue(state.should_stop) + self.assertTrue(register.called) + callback = register.call_args[0][0] + callback() + self.assertTrue(argv) + finally: + os.execv = execv + state.should_stop = False + + @disable_stdouts + def test_worker_term_hard_handler_when_threaded(self): + with patch('celery.apps.worker.active_thread_count') as c: + c.return_value = 3 + worker = self._Worker() + handlers = self.psig(cd.install_worker_term_hard_handler, worker) + try: + handlers['SIGQUIT']('SIGQUIT', object()) + self.assertTrue(state.should_terminate) + finally: + state.should_terminate = False + + @disable_stdouts + def test_worker_term_hard_handler_when_single_threaded(self): + with patch('celery.apps.worker.active_thread_count') as c: + c.return_value = 1 + worker = self._Worker() + handlers = self.psig(cd.install_worker_term_hard_handler, worker) + with self.assertRaises(WorkerTerminate): + handlers['SIGQUIT']('SIGQUIT', object()) diff --git a/celery/tests/case.py b/celery/tests/case.py new file mode 100644 index 0000000..0bc0c5d --- /dev/null +++ b/celery/tests/case.py @@ -0,0 +1,863 @@ +from __future__ import absolute_import + +try: + import unittest # noqa + unittest.skip + from unittest.util import safe_repr, unorderable_list_difference +except AttributeError: + import unittest2 as unittest # noqa + from unittest2.util import safe_repr, unorderable_list_difference # noqa + +import importlib +import inspect +import logging +import numbers +import os +import platform +import re +import sys +import threading +import time +import types +import warnings + +from contextlib import contextmanager +from copy import deepcopy +from datetime import datetime, timedelta +from functools import partial, wraps +from types import ModuleType + +try: + from unittest import mock +except ImportError: + import mock # noqa +from nose import SkipTest +from kombu import Queue +from kombu.log import NullHandler +from kombu.utils import nested, symbol_by_name + +from celery import Celery +from celery.app import current_app +from celery.backends.cache import CacheBackend, DummyClient +from celery.exceptions import CDeprecationWarning, CPendingDeprecationWarning +from celery.five import ( + WhateverIO, builtins, items, reraise, + string_t, values, open_fqdn, +) +from celery.utils.functional import noop +from celery.utils.imports import qualname + +__all__ = [ + 'Case', 'AppCase', 'Mock', 'MagicMock', 'ANY', + 'patch', 'call', 'sentinel', 'skip_unless_module', + 'wrap_logger', 'with_environ', 'sleepdeprived', + 'skip_if_environ', 'todo', 'skip', 'skip_if', + 'skip_unless', 'mask_modules', 'override_stdouts', 'mock_module', + 'replace_module_value', 'sys_platform', 'reset_modules', + 'patch_modules', 'mock_context', 'mock_open', 'patch_many', + 'assert_signal_called', 'skip_if_pypy', + 'skip_if_jython', 'body_from_sig', 'restore_logging', +] +patch = mock.patch +call = mock.call +sentinel = mock.sentinel +MagicMock = mock.MagicMock +ANY = mock.ANY + +PY3 = sys.version_info[0] == 3 + +CASE_REDEFINES_SETUP = """\ +{name} (subclass of AppCase) redefines private "setUp", should be: "setup"\ +""" +CASE_REDEFINES_TEARDOWN = """\ +{name} (subclass of AppCase) redefines private "tearDown", \ +should be: "teardown"\ +""" +CASE_LOG_REDIRECT_EFFECT = """\ +Test {0} did not disable LoggingProxy for {1}\ +""" +CASE_LOG_LEVEL_EFFECT = """\ +Test {0} Modified the level of the root logger\ +""" +CASE_LOG_HANDLER_EFFECT = """\ +Test {0} Modified handlers for the root logger\ +""" + +CELERY_TEST_CONFIG = { + #: Don't want log output when running suite. + 'CELERYD_HIJACK_ROOT_LOGGER': False, + 'CELERY_SEND_TASK_ERROR_EMAILS': False, + 'CELERY_DEFAULT_QUEUE': 'testcelery', + 'CELERY_DEFAULT_EXCHANGE': 'testcelery', + 'CELERY_DEFAULT_ROUTING_KEY': 'testcelery', + 'CELERY_QUEUES': ( + Queue('testcelery', routing_key='testcelery'), + ), + 'CELERY_ENABLE_UTC': True, + 'CELERY_TIMEZONE': 'UTC', + 'CELERYD_LOG_COLOR': False, + + # Mongo results tests (only executed if installed and running) + 'CELERY_MONGODB_BACKEND_SETTINGS': { + 'host': os.environ.get('MONGO_HOST') or 'localhost', + 'port': os.environ.get('MONGO_PORT') or 27017, + 'database': os.environ.get('MONGO_DB') or 'celery_unittests', + 'taskmeta_collection': (os.environ.get('MONGO_TASKMETA_COLLECTION') + or 'taskmeta_collection'), + 'user': os.environ.get('MONGO_USER'), + 'password': os.environ.get('MONGO_PASSWORD'), + } +} + + +class Trap(object): + + def __getattr__(self, name): + raise RuntimeError('Test depends on current_app') + + +class UnitLogging(symbol_by_name(Celery.log_cls)): + + def __init__(self, *args, **kwargs): + super(UnitLogging, self).__init__(*args, **kwargs) + self.already_setup = True + + +def UnitApp(name=None, broker=None, backend=None, + set_as_current=False, log=UnitLogging, **kwargs): + + app = Celery(name or 'celery.tests', + broker=broker or 'memory://', + backend=backend or 'cache+memory://', + set_as_current=set_as_current, + log=log, + **kwargs) + app.add_defaults(deepcopy(CELERY_TEST_CONFIG)) + return app + + +class Mock(mock.Mock): + + def __init__(self, *args, **kwargs): + attrs = kwargs.pop('attrs', None) or {} + super(Mock, self).__init__(*args, **kwargs) + for attr_name, attr_value in items(attrs): + setattr(self, attr_name, attr_value) + + +class _ContextMock(Mock): + """Dummy class implementing __enter__ and __exit__ + as the with statement requires these to be implemented + in the class, not just the instance.""" + + def __enter__(self): + pass + + def __exit__(self, *exc_info): + pass + + +def ContextMock(*args, **kwargs): + obj = _ContextMock(*args, **kwargs) + obj.attach_mock(_ContextMock(), '__enter__') + obj.attach_mock(_ContextMock(), '__exit__') + obj.__enter__.return_value = obj + # if __exit__ return a value the exception is ignored, + # so it must return None here. + obj.__exit__.return_value = None + return obj + + +def _bind(f, o): + @wraps(f) + def bound_meth(*fargs, **fkwargs): + return f(o, *fargs, **fkwargs) + return bound_meth + + +if PY3: # pragma: no cover + def _get_class_fun(meth): + return meth +else: + def _get_class_fun(meth): + return meth.__func__ + + +class MockCallbacks(object): + + def __new__(cls, *args, **kwargs): + r = Mock(name=cls.__name__) + _get_class_fun(cls.__init__)(r, *args, **kwargs) + for key, value in items(vars(cls)): + if key not in ('__dict__', '__weakref__', '__new__', '__init__'): + if inspect.ismethod(value) or inspect.isfunction(value): + r.__getattr__(key).side_effect = _bind(value, r) + else: + r.__setattr__(key, value) + return r + + +def skip_unless_module(module): + + def _inner(fun): + + @wraps(fun) + def __inner(*args, **kwargs): + try: + importlib.import_module(module) + except ImportError: + raise SkipTest('Does not have %s' % (module, )) + + return fun(*args, **kwargs) + + return __inner + return _inner + + +# -- adds assertWarns from recent unittest2, not in Python 2.7. + +class _AssertRaisesBaseContext(object): + + def __init__(self, expected, test_case, callable_obj=None, + expected_regex=None): + self.expected = expected + self.failureException = test_case.failureException + self.obj_name = None + if isinstance(expected_regex, string_t): + expected_regex = re.compile(expected_regex) + self.expected_regex = expected_regex + + +def _is_magic_module(m): + # some libraries create custom module types that are lazily + # lodaded, e.g. Django installs some modules in sys.modules that + # will load _tkinter and other shit when touched. + + # pyflakes refuses to accept 'noqa' for this isinstance. + cls, modtype = m.__class__, types.ModuleType + return (cls is not modtype and ( + '__getattr__' in vars(m.__class__) or + '__getattribute__' in vars(m.__class__))) + + +class _AssertWarnsContext(_AssertRaisesBaseContext): + """A context manager used to implement TestCase.assertWarns* methods.""" + + def __enter__(self): + # The __warningregistry__'s need to be in a pristine state for tests + # to work properly. + warnings.resetwarnings() + for v in list(values(sys.modules)): + # do not evaluate Django moved modules and other lazily + # initialized modules. + if v and not _is_magic_module(v): + # use raw __getattribute__ to protect even better from + # lazily loaded modules + try: + object.__getattribute__(v, '__warningregistry__') + except AttributeError: + pass + else: + object.__setattr__(v, '__warningregistry__', {}) + self.warnings_manager = warnings.catch_warnings(record=True) + self.warnings = self.warnings_manager.__enter__() + warnings.simplefilter('always', self.expected) + return self + + def __exit__(self, exc_type, exc_value, tb): + self.warnings_manager.__exit__(exc_type, exc_value, tb) + if exc_type is not None: + # let unexpected exceptions pass through + return + try: + exc_name = self.expected.__name__ + except AttributeError: + exc_name = str(self.expected) + first_matching = None + for m in self.warnings: + w = m.message + if not isinstance(w, self.expected): + continue + if first_matching is None: + first_matching = w + if (self.expected_regex is not None and + not self.expected_regex.search(str(w))): + continue + # store warning for later retrieval + self.warning = w + self.filename = m.filename + self.lineno = m.lineno + return + # Now we simply try to choose a helpful failure message + if first_matching is not None: + raise self.failureException( + '%r does not match %r' % ( + self.expected_regex.pattern, str(first_matching))) + if self.obj_name: + raise self.failureException( + '%s not triggered by %s' % (exc_name, self.obj_name)) + else: + raise self.failureException('%s not triggered' % exc_name) + + +class Case(unittest.TestCase): + + def assertWarns(self, expected_warning): + return _AssertWarnsContext(expected_warning, self, None) + + def assertWarnsRegex(self, expected_warning, expected_regex): + return _AssertWarnsContext(expected_warning, self, + None, expected_regex) + + @contextmanager + def assertDeprecated(self): + with self.assertWarnsRegex(CDeprecationWarning, + r'scheduled for removal'): + yield + + @contextmanager + def assertPendingDeprecation(self): + with self.assertWarnsRegex(CPendingDeprecationWarning, + r'scheduled for deprecation'): + yield + + def assertDictContainsSubset(self, expected, actual, msg=None): + missing, mismatched = [], [] + + for key, value in items(expected): + if key not in actual: + missing.append(key) + elif value != actual[key]: + mismatched.append('%s, expected: %s, actual: %s' % ( + safe_repr(key), safe_repr(value), + safe_repr(actual[key]))) + + if not (missing or mismatched): + return + + standard_msg = '' + if missing: + standard_msg = 'Missing: %s' % ','.join(map(safe_repr, missing)) + + if mismatched: + if standard_msg: + standard_msg += '; ' + standard_msg += 'Mismatched values: %s' % ( + ','.join(mismatched)) + + self.fail(self._formatMessage(msg, standard_msg)) + + def assertItemsEqual(self, expected_seq, actual_seq, msg=None): + missing = unexpected = None + try: + expected = sorted(expected_seq) + actual = sorted(actual_seq) + except TypeError: + # Unsortable items (example: set(), complex(), ...) + expected = list(expected_seq) + actual = list(actual_seq) + missing, unexpected = unorderable_list_difference( + expected, actual) + else: + return self.assertSequenceEqual(expected, actual, msg=msg) + + errors = [] + if missing: + errors.append( + 'Expected, but missing:\n %s' % (safe_repr(missing), ) + ) + if unexpected: + errors.append( + 'Unexpected, but present:\n %s' % (safe_repr(unexpected), ) + ) + if errors: + standardMsg = '\n'.join(errors) + self.fail(self._formatMessage(msg, standardMsg)) + + +def depends_on_current_app(fun): + if inspect.isclass(fun): + fun.contained = False + else: + @wraps(fun) + def __inner(self, *args, **kwargs): + self.app.set_current() + return fun(self, *args, **kwargs) + return __inner + + +class AppCase(Case): + contained = True + + def __init__(self, *args, **kwargs): + super(AppCase, self).__init__(*args, **kwargs) + if self.__class__.__dict__.get('setUp'): + raise RuntimeError( + CASE_REDEFINES_SETUP.format(name=qualname(self)), + ) + if self.__class__.__dict__.get('tearDown'): + raise RuntimeError( + CASE_REDEFINES_TEARDOWN.format(name=qualname(self)), + ) + + def Celery(self, *args, **kwargs): + return UnitApp(*args, **kwargs) + + def setUp(self): + self._threads_at_setup = list(threading.enumerate()) + from celery import _state + from celery import result + result.task_join_will_block = \ + _state.task_join_will_block = lambda: False + self._current_app = current_app() + self._default_app = _state.default_app + trap = Trap() + self._prev_tls = _state._tls + _state.set_default_app(trap) + + class NonTLS(object): + current_app = trap + _state._tls = NonTLS() + + self.app = self.Celery(set_as_current=False) + if not self.contained: + self.app.set_current() + root = logging.getLogger() + self.__rootlevel = root.level + self.__roothandlers = root.handlers + _state._set_task_join_will_block(False) + try: + self.setup() + except: + self._teardown_app() + raise + + def _teardown_app(self): + from celery.utils.log import LoggingProxy + assert sys.stdout + assert sys.stderr + assert sys.__stdout__ + assert sys.__stderr__ + this = self._get_test_name() + if isinstance(sys.stdout, LoggingProxy) or \ + isinstance(sys.__stdout__, LoggingProxy): + raise RuntimeError(CASE_LOG_REDIRECT_EFFECT.format(this, 'stdout')) + if isinstance(sys.stderr, LoggingProxy) or \ + isinstance(sys.__stderr__, LoggingProxy): + raise RuntimeError(CASE_LOG_REDIRECT_EFFECT.format(this, 'stderr')) + backend = self.app.__dict__.get('backend') + if backend is not None: + if isinstance(backend, CacheBackend): + if isinstance(backend.client, DummyClient): + backend.client.cache.clear() + backend._cache.clear() + from celery import _state + _state._set_task_join_will_block(False) + + _state.set_default_app(self._default_app) + _state._tls = self._prev_tls + _state._tls.current_app = self._current_app + if self.app is not self._current_app: + self.app.close() + self.app = None + self.assertEqual( + self._threads_at_setup, list(threading.enumerate()), + ) + + def _get_test_name(self): + return '.'.join([self.__class__.__name__, self._testMethodName]) + + def tearDown(self): + try: + self.teardown() + finally: + self._teardown_app() + self.assert_no_logging_side_effect() + + def assert_no_logging_side_effect(self): + this = self._get_test_name() + root = logging.getLogger() + if root.level != self.__rootlevel: + raise RuntimeError(CASE_LOG_LEVEL_EFFECT.format(this)) + if root.handlers != self.__roothandlers: + raise RuntimeError(CASE_LOG_HANDLER_EFFECT.format(this)) + + def setup(self): + pass + + def teardown(self): + pass + + +def get_handlers(logger): + return [h for h in logger.handlers if not isinstance(h, NullHandler)] + + +@contextmanager +def wrap_logger(logger, loglevel=logging.ERROR): + old_handlers = get_handlers(logger) + sio = WhateverIO() + siohandler = logging.StreamHandler(sio) + logger.handlers = [siohandler] + + try: + yield sio + finally: + logger.handlers = old_handlers + + +def with_environ(env_name, env_value): + + def _envpatched(fun): + + @wraps(fun) + def _patch_environ(*args, **kwargs): + prev_val = os.environ.get(env_name) + os.environ[env_name] = env_value + try: + return fun(*args, **kwargs) + finally: + os.environ[env_name] = prev_val or '' + + return _patch_environ + return _envpatched + + +def sleepdeprived(module=time): + + def _sleepdeprived(fun): + + @wraps(fun) + def __sleepdeprived(*args, **kwargs): + old_sleep = module.sleep + module.sleep = noop + try: + return fun(*args, **kwargs) + finally: + module.sleep = old_sleep + + return __sleepdeprived + + return _sleepdeprived + + +def skip_if_environ(env_var_name): + + def _wrap_test(fun): + + @wraps(fun) + def _skips_if_environ(*args, **kwargs): + if os.environ.get(env_var_name): + raise SkipTest('SKIP %s: %s set\n' % ( + fun.__name__, env_var_name)) + return fun(*args, **kwargs) + + return _skips_if_environ + + return _wrap_test + + +def _skip_test(reason, sign): + + def _wrap_test(fun): + + @wraps(fun) + def _skipped_test(*args, **kwargs): + raise SkipTest('%s: %s' % (sign, reason)) + + return _skipped_test + return _wrap_test + + +def todo(reason): + """TODO test decorator.""" + return _skip_test(reason, 'TODO') + + +def skip(reason): + """Skip test decorator.""" + return _skip_test(reason, 'SKIP') + + +def skip_if(predicate, reason): + """Skip test if predicate is :const:`True`.""" + + def _inner(fun): + return predicate and skip(reason)(fun) or fun + + return _inner + + +def skip_unless(predicate, reason): + """Skip test if predicate is :const:`False`.""" + return skip_if(not predicate, reason) + + +# Taken from +# http://bitbucket.org/runeh/snippets/src/tip/missing_modules.py +@contextmanager +def mask_modules(*modnames): + """Ban some modules from being importable inside the context + + For example: + + >>> with mask_modules('sys'): + ... try: + ... import sys + ... except ImportError: + ... print('sys not found') + sys not found + + >>> import sys # noqa + >>> sys.version + (2, 5, 2, 'final', 0) + + """ + + realimport = builtins.__import__ + + def myimp(name, *args, **kwargs): + if name in modnames: + raise ImportError('No module named %s' % name) + else: + return realimport(name, *args, **kwargs) + + builtins.__import__ = myimp + try: + yield True + finally: + builtins.__import__ = realimport + + +@contextmanager +def override_stdouts(): + """Override `sys.stdout` and `sys.stderr` with `WhateverIO`.""" + prev_out, prev_err = sys.stdout, sys.stderr + mystdout, mystderr = WhateverIO(), WhateverIO() + sys.stdout = sys.__stdout__ = mystdout + sys.stderr = sys.__stderr__ = mystderr + + try: + yield mystdout, mystderr + finally: + sys.stdout = sys.__stdout__ = prev_out + sys.stderr = sys.__stderr__ = prev_err + + +def _old_patch(module, name, mocked): + module = importlib.import_module(module) + + def _patch(fun): + + @wraps(fun) + def __patched(*args, **kwargs): + prev = getattr(module, name) + setattr(module, name, mocked) + try: + return fun(*args, **kwargs) + finally: + setattr(module, name, prev) + return __patched + return _patch + + +@contextmanager +def replace_module_value(module, name, value=None): + has_prev = hasattr(module, name) + prev = getattr(module, name, None) + if value: + setattr(module, name, value) + else: + try: + delattr(module, name) + except AttributeError: + pass + try: + yield + finally: + if prev is not None: + setattr(sys, name, prev) + if not has_prev: + try: + delattr(module, name) + except AttributeError: + pass +pypy_version = partial( + replace_module_value, sys, 'pypy_version_info', +) +platform_pyimp = partial( + replace_module_value, platform, 'python_implementation', +) + + +@contextmanager +def sys_platform(value): + prev, sys.platform = sys.platform, value + try: + yield + finally: + sys.platform = prev + + +@contextmanager +def reset_modules(*modules): + prev = dict((k, sys.modules.pop(k)) for k in modules if k in sys.modules) + try: + yield + finally: + sys.modules.update(prev) + + +@contextmanager +def patch_modules(*modules): + prev = {} + for mod in modules: + prev[mod] = sys.modules.get(mod) + sys.modules[mod] = ModuleType(mod) + try: + yield + finally: + for name, mod in items(prev): + if mod is None: + sys.modules.pop(name, None) + else: + sys.modules[name] = mod + + +@contextmanager +def mock_module(*names): + prev = {} + + class MockModule(ModuleType): + + def __getattr__(self, attr): + setattr(self, attr, Mock()) + return ModuleType.__getattribute__(self, attr) + + mods = [] + for name in names: + try: + prev[name] = sys.modules[name] + except KeyError: + pass + mod = sys.modules[name] = MockModule(name) + mods.append(mod) + try: + yield mods + finally: + for name in names: + try: + sys.modules[name] = prev[name] + except KeyError: + try: + del(sys.modules[name]) + except KeyError: + pass + + +@contextmanager +def mock_context(mock, typ=Mock): + context = mock.return_value = Mock() + context.__enter__ = typ() + context.__exit__ = typ() + + def on_exit(*x): + if x[0]: + reraise(x[0], x[1], x[2]) + context.__exit__.side_effect = on_exit + context.__enter__.return_value = context + try: + yield context + finally: + context.reset() + + +@contextmanager +def mock_open(typ=WhateverIO, side_effect=None): + with patch(open_fqdn) as open_: + with mock_context(open_) as context: + if side_effect is not None: + context.__enter__.side_effect = side_effect + val = context.__enter__.return_value = typ() + val.__exit__ = Mock() + yield val + + +def patch_many(*targets): + return nested(*[patch(target) for target in targets]) + + +@contextmanager +def assert_signal_called(signal, **expected): + handler = Mock() + call_handler = partial(handler) + signal.connect(call_handler) + try: + yield handler + finally: + signal.disconnect(call_handler) + handler.assert_called_with(signal=signal, **expected) + + +def skip_if_pypy(fun): + + @wraps(fun) + def _inner(*args, **kwargs): + if getattr(sys, 'pypy_version_info', None): + raise SkipTest('does not work on PyPy') + return fun(*args, **kwargs) + return _inner + + +def skip_if_jython(fun): + + @wraps(fun) + def _inner(*args, **kwargs): + if sys.platform.startswith('java'): + raise SkipTest('does not work on Jython') + return fun(*args, **kwargs) + return _inner + + +def body_from_sig(app, sig, utc=True): + sig.freeze() + callbacks = sig.options.pop('link', None) + errbacks = sig.options.pop('link_error', None) + countdown = sig.options.pop('countdown', None) + if countdown: + eta = app.now() + timedelta(seconds=countdown) + else: + eta = sig.options.pop('eta', None) + if eta and isinstance(eta, datetime): + eta = eta.isoformat() + expires = sig.options.pop('expires', None) + if expires and isinstance(expires, numbers.Real): + expires = app.now() + timedelta(seconds=expires) + if expires and isinstance(expires, datetime): + expires = expires.isoformat() + return { + 'task': sig.task, + 'id': sig.id, + 'args': sig.args, + 'kwargs': sig.kwargs, + 'callbacks': [dict(s) for s in callbacks] if callbacks else None, + 'errbacks': [dict(s) for s in errbacks] if errbacks else None, + 'eta': eta, + 'utc': utc, + 'expires': expires, + } + + +@contextmanager +def restore_logging(): + outs = sys.stdout, sys.stderr, sys.__stdout__, sys.__stderr__ + root = logging.getLogger() + level = root.level + handlers = root.handlers + + try: + yield + finally: + sys.stdout, sys.stderr, sys.__stdout__, sys.__stderr__ = outs + root.level = level + root.handlers[:] = handlers diff --git a/celery/tests/compat_modules/__init__.py b/celery/tests/compat_modules/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/celery/tests/compat_modules/test_compat.py b/celery/tests/compat_modules/test_compat.py new file mode 100644 index 0000000..d285188 --- /dev/null +++ b/celery/tests/compat_modules/test_compat.py @@ -0,0 +1,82 @@ +from __future__ import absolute_import + +from datetime import timedelta + +import sys +sys.modules.pop('celery.task', None) + +from celery.schedules import schedule +from celery.task import ( + periodic_task, + PeriodicTask +) +from celery.utils.timeutils import timedelta_seconds + +from celery.tests.case import AppCase, depends_on_current_app + + +class test_Task(AppCase): + + def test_base_task_inherits_magic_kwargs_from_app(self): + from celery.task import Task as OldTask + + class timkX(OldTask): + abstract = True + + with self.Celery(set_as_current=False, + accept_magic_kwargs=True) as app: + timkX.bind(app) + # see #918 + self.assertFalse(timkX.accept_magic_kwargs) + + from celery import Task as NewTask + + class timkY(NewTask): + abstract = True + + timkY.bind(app) + self.assertFalse(timkY.accept_magic_kwargs) + + +@depends_on_current_app +class test_periodic_tasks(AppCase): + + def setup(self): + @periodic_task(app=self.app, shared=False, + run_every=schedule(timedelta(hours=1), app=self.app)) + def my_periodic(): + pass + self.my_periodic = my_periodic + + def now(self): + return self.app.now() + + def test_must_have_run_every(self): + with self.assertRaises(NotImplementedError): + type('Foo', (PeriodicTask, ), {'__module__': __name__}) + + def test_remaining_estimate(self): + s = self.my_periodic.run_every + self.assertIsInstance( + s.remaining_estimate(s.maybe_make_aware(self.now())), + timedelta) + + def test_is_due_not_due(self): + due, remaining = self.my_periodic.run_every.is_due(self.now()) + self.assertFalse(due) + # This assertion may fail if executed in the + # first minute of an hour, thus 59 instead of 60 + self.assertGreater(remaining, 59) + + def test_is_due(self): + p = self.my_periodic + due, remaining = p.run_every.is_due( + self.now() - p.run_every.run_every, + ) + self.assertTrue(due) + self.assertEqual(remaining, + timedelta_seconds(p.run_every.run_every)) + + def test_schedule_repr(self): + p = self.my_periodic + self.assertTrue(repr(p.run_every)) diff --git a/celery/tests/compat_modules/test_compat_utils.py b/celery/tests/compat_modules/test_compat_utils.py new file mode 100644 index 0000000..b041a0b --- /dev/null +++ b/celery/tests/compat_modules/test_compat_utils.py @@ -0,0 +1,50 @@ +from __future__ import absolute_import + +import celery + +from celery.app.task import Task as ModernTask +from celery.task.base import Task as CompatTask + +from celery.tests.case import AppCase, depends_on_current_app + + +@depends_on_current_app +class test_MagicModule(AppCase): + + def test_class_property_set_without_type(self): + self.assertTrue(ModernTask.__dict__['app'].__get__(CompatTask())) + + def test_class_property_set_on_class(self): + self.assertIs(ModernTask.__dict__['app'].__set__(None, None), + ModernTask.__dict__['app']) + + def test_class_property_set(self): + + class X(CompatTask): + pass + ModernTask.__dict__['app'].__set__(X(), self.app) + self.assertIs(X.app, self.app) + + def test_dir(self): + self.assertTrue(dir(celery.messaging)) + + def test_direct(self): + self.assertTrue(celery.task) + + def test_app_attrs(self): + self.assertEqual(celery.task.control.broadcast, + celery.current_app.control.broadcast) + + def test_decorators_task(self): + @celery.decorators.task + def _test_decorators_task(): + pass + + self.assertTrue(_test_decorators_task.accept_magic_kwargs) + + def test_decorators_periodic_task(self): + @celery.decorators.periodic_task(run_every=3600) + def _test_decorators_ptask(): + pass + + self.assertTrue(_test_decorators_ptask.accept_magic_kwargs) diff --git a/celery/tests/compat_modules/test_decorators.py b/celery/tests/compat_modules/test_decorators.py new file mode 100644 index 0000000..9f5dff9 --- /dev/null +++ b/celery/tests/compat_modules/test_decorators.py @@ -0,0 +1,39 @@ +from __future__ import absolute_import + +import warnings + +from celery.task import base + +from celery.tests.case import AppCase, depends_on_current_app + + +def add(x, y): + return x + y + + +@depends_on_current_app +class test_decorators(AppCase): + + def test_task_alias(self): + from celery import task + self.assertTrue(task.__file__) + self.assertTrue(task(add)) + + def setup(self): + with warnings.catch_warnings(record=True): + from celery import decorators + self.decorators = decorators + + def assertCompatDecorator(self, decorator, type, **opts): + task = decorator(**opts)(add) + self.assertEqual(task(8, 8), 16) + self.assertTrue(task.accept_magic_kwargs) + self.assertIsInstance(task, type) + + def test_task(self): + self.assertCompatDecorator(self.decorators.task, base.BaseTask) + + def test_periodic_task(self): + self.assertCompatDecorator(self.decorators.periodic_task, + base.BaseTask, + run_every=1) diff --git a/celery/tests/compat_modules/test_http.py b/celery/tests/compat_modules/test_http.py new file mode 100644 index 0000000..08505f8 --- /dev/null +++ b/celery/tests/compat_modules/test_http.py @@ -0,0 +1,158 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import, unicode_literals + +from contextlib import contextmanager +from functools import wraps +try: + from urllib import addinfourl +except ImportError: # py3k + from urllib.request import addinfourl # noqa + +from anyjson import dumps +from kombu.utils.encoding import from_utf8 + +from celery.five import WhateverIO, items +from celery.task import http +from celery.tests.case import AppCase, Case + + +@contextmanager +def mock_urlopen(response_method): + + urlopen = http.urlopen + + @wraps(urlopen) + def _mocked(url, *args, **kwargs): + response_data, headers = response_method(url) + return addinfourl(WhateverIO(response_data), headers, url) + + http.urlopen = _mocked + + try: + yield True + finally: + http.urlopen = urlopen + + +def _response(res): + return lambda r: (res, []) + + +def success_response(value): + return _response(dumps({'status': 'success', 'retval': value})) + + +def fail_response(reason): + return _response(dumps({'status': 'failure', 'reason': reason})) + + +def unknown_response(): + return _response(dumps({'status': 'u.u.u.u', 'retval': True})) + + +class test_encodings(Case): + + def test_utf8dict(self): + uk = 'foobar' + d = {'følelser ær langé': 'ærbadægzaååÆØÅ', + from_utf8(uk): from_utf8('xuzzybaz')} + + for key, value in items(http.utf8dict(items(d))): + self.assertIsInstance(key, str) + self.assertIsInstance(value, str) + + +class test_MutableURL(Case): + + def test_url_query(self): + url = http.MutableURL('http://example.com?x=10&y=20&z=Foo') + self.assertDictContainsSubset({'x': '10', + 'y': '20', + 'z': 'Foo'}, url.query) + url.query['name'] = 'George' + url = http.MutableURL(str(url)) + self.assertDictContainsSubset({'x': '10', + 'y': '20', + 'z': 'Foo', + 'name': 'George'}, url.query) + + def test_url_keeps_everything(self): + url = 'https://e.com:808/foo/bar#zeta?x=10&y=20' + url = http.MutableURL(url) + + self.assertEqual( + str(url).split('?')[0], + 'https://e.com:808/foo/bar#zeta', + ) + + def test___repr__(self): + url = http.MutableURL('http://e.com/foo/bar') + self.assertTrue(repr(url).startswith(' 50: + return True + raise err + finally: + called[0] += 1 + sock.return_value.bind.side_effect = effect + Rdb(out=out) diff --git a/celery/tests/events/__init__.py b/celery/tests/events/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/celery/tests/events/test_cursesmon.py b/celery/tests/events/test_cursesmon.py new file mode 100644 index 0000000..c8e6151 --- /dev/null +++ b/celery/tests/events/test_cursesmon.py @@ -0,0 +1,70 @@ +from __future__ import absolute_import + +from celery.tests.case import AppCase, SkipTest + + +class MockWindow(object): + + def getmaxyx(self): + return self.y, self.x + + +class test_CursesDisplay(AppCase): + + def setup(self): + try: + import curses # noqa + except ImportError: + raise SkipTest('curses monitor requires curses') + + from celery.events import cursesmon + self.monitor = cursesmon.CursesMonitor(object(), app=self.app) + self.win = MockWindow() + self.monitor.win = self.win + + def test_format_row_with_default_widths(self): + self.win.x, self.win.y = 91, 24 + row = self.monitor.format_row( + '783da208-77d0-40ca-b3d6-37dd6dbb55d3', + 'task.task.task.task.task.task.task.task.task.tas', + 'workerworkerworkerworkerworkerworkerworkerworker', + '21:13:20', + 'SUCCESS') + self.assertEqual('783da208-77d0-40ca-b3d6-37dd6dbb55d3 ' + 'workerworker... task.task.[.]tas 21:13:20 SUCCESS ', + row) + + def test_format_row_with_truncated_uuid(self): + self.win.x, self.win.y = 80, 24 + row = self.monitor.format_row( + '783da208-77d0-40ca-b3d6-37dd6dbb55d3', + 'task.task.task.task.task.task.task.task.task.tas', + 'workerworkerworkerworkerworkerworkerworkerworker', + '21:13:20', + 'SUCCESS') + self.assertEqual('783da208-77d0-40ca-b3d... workerworker... ' + 'task.task.[.]tas 21:13:20 SUCCESS ', + row) + + def test_format_title_row(self): + self.win.x, self.win.y = 80, 24 + row = self.monitor.format_row('UUID', 'TASK', + 'WORKER', 'TIME', 'STATE') + self.assertEqual('UUID WORKER ' + 'TASK TIME STATE ', + row) + + def test_format_row_for_wide_screen_with_short_uuid(self): + self.win.x, self.win.y = 140, 24 + row = self.monitor.format_row( + '783da208-77d0-40ca-b3d6-37dd6dbb55d3', + 'task.task.task.task.task.task.task.task.task.tas', + 'workerworkerworkerworkerworkerworkerworkerworker', + '21:13:20', + 'SUCCESS') + self.assertEqual(136, len(row)) + self.assertEqual('783da208-77d0-40ca-b3d6-37dd6dbb55d3 ' + 'workerworkerworkerworkerworkerworker... ' + 'task.task.task.task.task.task.task.[.]tas ' + '21:13:20 SUCCESS ', + row) diff --git a/celery/tests/events/test_events.py b/celery/tests/events/test_events.py new file mode 100644 index 0000000..791f416 --- /dev/null +++ b/celery/tests/events/test_events.py @@ -0,0 +1,260 @@ +from __future__ import absolute_import + +import socket + +from celery.events import Event +from celery.tests.case import AppCase, Mock + + +class MockProducer(object): + raise_on_publish = False + + def __init__(self, *args, **kwargs): + self.sent = [] + + def publish(self, msg, *args, **kwargs): + if self.raise_on_publish: + raise KeyError() + self.sent.append(msg) + + def close(self): + pass + + def has_event(self, kind): + for event in self.sent: + if event['type'] == kind: + return event + return False + + +class test_Event(AppCase): + + def test_constructor(self): + event = Event('world war II') + self.assertEqual(event['type'], 'world war II') + self.assertTrue(event['timestamp']) + + +class test_EventDispatcher(AppCase): + + def test_redis_uses_fanout_exchange(self): + self.app.connection = Mock() + conn = self.app.connection.return_value = Mock() + conn.transport.driver_type = 'redis' + + dispatcher = self.app.events.Dispatcher(conn, enabled=False) + self.assertEqual(dispatcher.exchange.type, 'fanout') + + def test_others_use_topic_exchange(self): + self.app.connection = Mock() + conn = self.app.connection.return_value = Mock() + conn.transport.driver_type = 'amqp' + dispatcher = self.app.events.Dispatcher(conn, enabled=False) + self.assertEqual(dispatcher.exchange.type, 'topic') + + def test_takes_channel_connection(self): + x = self.app.events.Dispatcher(channel=Mock()) + self.assertIs(x.connection, x.channel.connection.client) + + def test_sql_transports_disabled(self): + conn = Mock() + conn.transport.driver_type = 'sql' + x = self.app.events.Dispatcher(connection=conn) + self.assertFalse(x.enabled) + + def test_send(self): + producer = MockProducer() + producer.connection = self.app.connection() + connection = Mock() + connection.transport.driver_type = 'amqp' + eventer = self.app.events.Dispatcher(connection, enabled=False, + buffer_while_offline=False) + eventer.producer = producer + eventer.enabled = True + eventer.send('World War II', ended=True) + self.assertTrue(producer.has_event('World War II')) + eventer.enabled = False + eventer.send('World War III') + self.assertFalse(producer.has_event('World War III')) + + evs = ('Event 1', 'Event 2', 'Event 3') + eventer.enabled = True + eventer.producer.raise_on_publish = True + eventer.buffer_while_offline = False + with self.assertRaises(KeyError): + eventer.send('Event X') + eventer.buffer_while_offline = True + for ev in evs: + eventer.send(ev) + eventer.producer.raise_on_publish = False + eventer.flush() + for ev in evs: + self.assertTrue(producer.has_event(ev)) + + buf = eventer._outbound_buffer = Mock() + buf.popleft.side_effect = IndexError() + eventer.flush() + + def test_enter_exit(self): + with self.app.connection() as conn: + d = self.app.events.Dispatcher(conn) + d.close = Mock() + with d as _d: + self.assertTrue(_d) + d.close.assert_called_with() + + def test_enable_disable_callbacks(self): + on_enable = Mock() + on_disable = Mock() + with self.app.connection() as conn: + with self.app.events.Dispatcher(conn, enabled=False) as d: + d.on_enabled.add(on_enable) + d.on_disabled.add(on_disable) + d.enable() + on_enable.assert_called_with() + d.disable() + on_disable.assert_called_with() + + def test_enabled_disable(self): + connection = self.app.connection() + channel = connection.channel() + try: + dispatcher = self.app.events.Dispatcher(connection, + enabled=True) + dispatcher2 = self.app.events.Dispatcher(connection, + enabled=True, + channel=channel) + self.assertTrue(dispatcher.enabled) + self.assertTrue(dispatcher.producer.channel) + self.assertEqual(dispatcher.producer.serializer, + self.app.conf.CELERY_EVENT_SERIALIZER) + + created_channel = dispatcher.producer.channel + dispatcher.disable() + dispatcher.disable() # Disable with no active producer + dispatcher2.disable() + self.assertFalse(dispatcher.enabled) + self.assertIsNone(dispatcher.producer) + self.assertFalse(dispatcher2.channel.closed, + 'does not close manually provided channel') + + dispatcher.enable() + self.assertTrue(dispatcher.enabled) + self.assertTrue(dispatcher.producer) + + # XXX test compat attribute + self.assertIs(dispatcher.publisher, dispatcher.producer) + prev, dispatcher.publisher = dispatcher.producer, 42 + try: + self.assertEqual(dispatcher.producer, 42) + finally: + dispatcher.producer = prev + finally: + channel.close() + connection.close() + self.assertTrue(created_channel.closed) + + +class test_EventReceiver(AppCase): + + def test_process(self): + + message = {'type': 'world-war'} + + got_event = [False] + + def my_handler(event): + got_event[0] = True + + connection = Mock() + connection.transport_cls = 'memory' + r = self.app.events.Receiver( + connection, + handlers={'world-war': my_handler}, + node_id='celery.tests', + ) + r._receive(message, object()) + self.assertTrue(got_event[0]) + + def test_catch_all_event(self): + + message = {'type': 'world-war'} + + got_event = [False] + + def my_handler(event): + got_event[0] = True + + connection = Mock() + connection.transport_cls = 'memory' + r = self.app.events.Receiver(connection, node_id='celery.tests') + r.handlers['*'] = my_handler + r._receive(message, object()) + self.assertTrue(got_event[0]) + + def test_itercapture(self): + connection = self.app.connection() + try: + r = self.app.events.Receiver(connection, node_id='celery.tests') + it = r.itercapture(timeout=0.0001, wakeup=False) + + with self.assertRaises(socket.timeout): + next(it) + + with self.assertRaises(socket.timeout): + r.capture(timeout=0.00001) + finally: + connection.close() + + def test_event_from_message_localize_disabled(self): + r = self.app.events.Receiver(Mock(), node_id='celery.tests') + r.adjust_clock = Mock() + ts_adjust = Mock() + + r.event_from_message( + {'type': 'worker-online', 'clock': 313}, + localize=False, + adjust_timestamp=ts_adjust, + ) + self.assertFalse(ts_adjust.called) + r.adjust_clock.assert_called_with(313) + + def test_itercapture_limit(self): + connection = self.app.connection() + channel = connection.channel() + try: + events_received = [0] + + def handler(event): + events_received[0] += 1 + + producer = self.app.events.Dispatcher( + connection, enabled=True, channel=channel, + ) + r = self.app.events.Receiver( + connection, + handlers={'*': handler}, + node_id='celery.tests', + ) + evs = ['ev1', 'ev2', 'ev3', 'ev4', 'ev5'] + for ev in evs: + producer.send(ev) + it = r.itercapture(limit=4, wakeup=True) + next(it) # skip consumer (see itercapture) + list(it) + self.assertEqual(events_received[0], 4) + finally: + channel.close() + connection.close() + + +class test_misc(AppCase): + + def test_State(self): + state = self.app.events.State() + self.assertDictEqual(dict(state.workers), {}) + + def test_default_dispatcher(self): + with self.app.events.default_dispatcher() as d: + self.assertTrue(d) + self.assertTrue(d.connection) diff --git a/celery/tests/events/test_snapshot.py b/celery/tests/events/test_snapshot.py new file mode 100644 index 0000000..f551751 --- /dev/null +++ b/celery/tests/events/test_snapshot.py @@ -0,0 +1,130 @@ +from __future__ import absolute_import + +from celery.events import Events +from celery.events.snapshot import Polaroid, evcam +from celery.tests.case import AppCase, patch, restore_logging + + +class TRef(object): + active = True + called = False + + def __call__(self): + self.called = True + + def cancel(self): + self.active = False + + +class MockTimer(object): + installed = [] + + def call_repeatedly(self, secs, fun, *args, **kwargs): + self.installed.append(fun) + return TRef() +timer = MockTimer() + + +class test_Polaroid(AppCase): + + def setup(self): + self.state = self.app.events.State() + + def test_constructor(self): + x = Polaroid(self.state, app=self.app) + self.assertIs(x.app, self.app) + self.assertIs(x.state, self.state) + self.assertTrue(x.freq) + self.assertTrue(x.cleanup_freq) + self.assertTrue(x.logger) + self.assertFalse(x.maxrate) + + def test_install_timers(self): + x = Polaroid(self.state, app=self.app) + x.timer = timer + x.__exit__() + x.__enter__() + self.assertIn(x.capture, MockTimer.installed) + self.assertIn(x.cleanup, MockTimer.installed) + self.assertTrue(x._tref.active) + self.assertTrue(x._ctref.active) + x.__exit__() + self.assertFalse(x._tref.active) + self.assertFalse(x._ctref.active) + self.assertTrue(x._tref.called) + self.assertFalse(x._ctref.called) + + def test_cleanup(self): + x = Polaroid(self.state, app=self.app) + cleanup_signal_sent = [False] + + def handler(**kwargs): + cleanup_signal_sent[0] = True + + x.cleanup_signal.connect(handler) + x.cleanup() + self.assertTrue(cleanup_signal_sent[0]) + + def test_shutter__capture(self): + x = Polaroid(self.state, app=self.app) + shutter_signal_sent = [False] + + def handler(**kwargs): + shutter_signal_sent[0] = True + + x.shutter_signal.connect(handler) + x.shutter() + self.assertTrue(shutter_signal_sent[0]) + + shutter_signal_sent[0] = False + x.capture() + self.assertTrue(shutter_signal_sent[0]) + + def test_shutter_maxrate(self): + x = Polaroid(self.state, app=self.app, maxrate='1/h') + shutter_signal_sent = [0] + + def handler(**kwargs): + shutter_signal_sent[0] += 1 + + x.shutter_signal.connect(handler) + for i in range(30): + x.shutter() + x.shutter() + x.shutter() + self.assertEqual(shutter_signal_sent[0], 1) + + +class test_evcam(AppCase): + + class MockReceiver(object): + raise_keyboard_interrupt = False + + def capture(self, **kwargs): + if self.__class__.raise_keyboard_interrupt: + raise KeyboardInterrupt() + + class MockEvents(Events): + + def Receiver(self, *args, **kwargs): + return test_evcam.MockReceiver() + + def setup(self): + self.app.events = self.MockEvents() + self.app.events.app = self.app + + def test_evcam(self): + with restore_logging(): + evcam(Polaroid, timer=timer, app=self.app) + evcam(Polaroid, timer=timer, loglevel='CRITICAL', app=self.app) + self.MockReceiver.raise_keyboard_interrupt = True + try: + with self.assertRaises(SystemExit): + evcam(Polaroid, timer=timer, app=self.app) + finally: + self.MockReceiver.raise_keyboard_interrupt = False + + @patch('celery.platforms.create_pidlock') + def test_evcam_pidfile(self, create_pidlock): + evcam(Polaroid, timer=timer, pidfile='/var/pid', app=self.app) + create_pidlock.assert_called_with('/var/pid') diff --git a/celery/tests/events/test_state.py b/celery/tests/events/test_state.py new file mode 100644 index 0000000..b7e35d7 --- /dev/null +++ b/celery/tests/events/test_state.py @@ -0,0 +1,581 @@ +from __future__ import absolute_import + +import pickle + +from decimal import Decimal +from random import shuffle +from time import time +from itertools import count + +from celery import states +from celery.events import Event +from celery.events.state import ( + State, + Worker, + Task, + HEARTBEAT_EXPIRE_WINDOW, + HEARTBEAT_DRIFT_MAX, +) +from celery.five import range +from celery.utils import uuid +from celery.tests.case import AppCase, Mock, patch + +try: + Decimal(2.6) +except TypeError: # pragma: no cover + # Py2.6: Must first convert float to str + _float_to_decimal = str +else: + _float_to_decimal = lambda f: f # noqa + + +class replay(object): + + def __init__(self, state): + self.state = state + self.rewind() + self.setup() + self.current_clock = 0 + + def setup(self): + pass + + def next_event(self): + ev = self.events[next(self.position)] + ev['local_received'] = ev['timestamp'] + try: + self.current_clock = ev['clock'] + except KeyError: + ev['clock'] = self.current_clock = self.current_clock + 1 + return ev + + def __iter__(self): + return self + + def __next__(self): + try: + self.state.event(self.next_event()) + except IndexError: + raise StopIteration() + next = __next__ + + def rewind(self): + self.position = count(0) + return self + + def play(self): + for _ in self: + pass + + +class ev_worker_online_offline(replay): + + def setup(self): + self.events = [ + Event('worker-online', hostname='utest1'), + Event('worker-offline', hostname='utest1'), + ] + + +class ev_worker_heartbeats(replay): + + def setup(self): + self.events = [ + Event('worker-heartbeat', hostname='utest1', + timestamp=time() - HEARTBEAT_EXPIRE_WINDOW * 2), + Event('worker-heartbeat', hostname='utest1'), + ] + + +class ev_task_states(replay): + + def setup(self): + tid = self.tid = uuid() + self.events = [ + Event('task-received', uuid=tid, name='task1', + args='(2, 2)', kwargs="{'foo': 'bar'}", + retries=0, eta=None, hostname='utest1'), + Event('task-started', uuid=tid, hostname='utest1'), + Event('task-revoked', uuid=tid, hostname='utest1'), + Event('task-retried', uuid=tid, exception="KeyError('bar')", + traceback='line 2 at main', hostname='utest1'), + Event('task-failed', uuid=tid, exception="KeyError('foo')", + traceback='line 1 at main', hostname='utest1'), + Event('task-succeeded', uuid=tid, result='4', + runtime=0.1234, hostname='utest1'), + ] + + +def QTEV(type, uuid, hostname, clock, name=None, timestamp=None): + """Quick task event.""" + return Event('task-{0}'.format(type), uuid=uuid, hostname=hostname, + clock=clock, name=name, timestamp=timestamp or time()) + + +class ev_logical_clock_ordering(replay): + + def __init__(self, state, offset=0, uids=None): + self.offset = offset or 0 + self.uids = self.setuids(uids) + super(ev_logical_clock_ordering, self).__init__(state) + + def setuids(self, uids): + uids = self.tA, self.tB, self.tC = uids or [uuid(), uuid(), uuid()] + return uids + + def setup(self): + offset = self.offset + tA, tB, tC = self.uids + self.events = [ + QTEV('received', tA, 'w1', name='tA', clock=offset + 1), + QTEV('received', tB, 'w2', name='tB', clock=offset + 1), + QTEV('started', tA, 'w1', name='tA', clock=offset + 3), + QTEV('received', tC, 'w2', name='tC', clock=offset + 3), + QTEV('started', tB, 'w2', name='tB', clock=offset + 5), + QTEV('retried', tA, 'w1', name='tA', clock=offset + 7), + QTEV('succeeded', tB, 'w2', name='tB', clock=offset + 9), + QTEV('started', tC, 'w2', name='tC', clock=offset + 10), + QTEV('received', tA, 'w3', name='tA', clock=offset + 13), + QTEV('succeded', tC, 'w2', name='tC', clock=offset + 12), + QTEV('started', tA, 'w3', name='tA', clock=offset + 14), + QTEV('succeeded', tA, 'w3', name='TA', clock=offset + 16), + ] + + def rewind_with_offset(self, offset, uids=None): + self.offset = offset + self.uids = self.setuids(uids or self.uids) + self.setup() + self.rewind() + + +class ev_snapshot(replay): + + def setup(self): + self.events = [ + Event('worker-online', hostname='utest1'), + Event('worker-online', hostname='utest2'), + Event('worker-online', hostname='utest3'), + ] + for i in range(20): + worker = not i % 2 and 'utest2' or 'utest1' + type = not i % 2 and 'task2' or 'task1' + self.events.append(Event('task-received', name=type, + uuid=uuid(), hostname=worker)) + + +class test_Worker(AppCase): + + def test_equality(self): + self.assertEqual(Worker(hostname='foo').hostname, 'foo') + self.assertEqual( + Worker(hostname='foo'), Worker(hostname='foo'), + ) + self.assertNotEqual( + Worker(hostname='foo'), Worker(hostname='bar'), + ) + self.assertEqual( + hash(Worker(hostname='foo')), hash(Worker(hostname='foo')), + ) + self.assertNotEqual( + hash(Worker(hostname='foo')), hash(Worker(hostname='bar')), + ) + + def test_compatible_with_Decimal(self): + w = Worker('george@vandelay.com') + timestamp, local_received = Decimal(_float_to_decimal(time())), time() + w.event('worker-online', timestamp, local_received, fields={ + 'hostname': 'george@vandelay.com', + 'timestamp': timestamp, + 'local_received': local_received, + 'freq': Decimal(_float_to_decimal(5.6335431)), + }) + self.assertTrue(w.alive) + + def test_survives_missing_timestamp(self): + worker = Worker(hostname='foo') + worker.event('heartbeat') + self.assertEqual(worker.heartbeats, []) + + def test_repr(self): + self.assertTrue(repr(Worker(hostname='foo'))) + + def test_drift_warning(self): + worker = Worker(hostname='foo') + with patch('celery.events.state.warn') as warn: + worker.event(None, time() + (HEARTBEAT_DRIFT_MAX * 2), time()) + self.assertTrue(warn.called) + self.assertIn('Substantial drift', warn.call_args[0][0]) + + def test_updates_heartbeat(self): + worker = Worker(hostname='foo') + worker.event(None, time(), time()) + self.assertEqual(len(worker.heartbeats), 1) + h1 = worker.heartbeats[0] + worker.event(None, time(), time() - 10) + self.assertEqual(len(worker.heartbeats), 2) + self.assertEqual(worker.heartbeats[-1], h1) + + +class test_Task(AppCase): + + def test_equality(self): + self.assertEqual(Task(uuid='foo').uuid, 'foo') + self.assertEqual( + Task(uuid='foo'), Task(uuid='foo'), + ) + self.assertNotEqual( + Task(uuid='foo'), Task(uuid='bar'), + ) + self.assertEqual( + hash(Task(uuid='foo')), hash(Task(uuid='foo')), + ) + self.assertNotEqual( + hash(Task(uuid='foo')), hash(Task(uuid='bar')), + ) + + def test_info(self): + task = Task(uuid='abcdefg', + name='tasks.add', + args='(2, 2)', + kwargs='{}', + retries=2, + result=42, + eta=1, + runtime=0.0001, + expires=1, + foo=None, + exception=1, + received=time() - 10, + started=time() - 8, + exchange='celery', + routing_key='celery', + succeeded=time()) + self.assertEqual(sorted(list(task._info_fields)), + sorted(task.info().keys())) + + self.assertEqual(sorted(list(task._info_fields + ('received', ))), + sorted(task.info(extra=('received', )))) + + self.assertEqual(sorted(['args', 'kwargs']), + sorted(task.info(['args', 'kwargs']).keys())) + self.assertFalse(list(task.info('foo'))) + + def test_ready(self): + task = Task(uuid='abcdefg', + name='tasks.add') + task.event('received', time(), time()) + self.assertFalse(task.ready) + task.event('succeeded', time(), time()) + self.assertTrue(task.ready) + + def test_sent(self): + task = Task(uuid='abcdefg', + name='tasks.add') + task.event('sent', time(), time()) + self.assertEqual(task.state, states.PENDING) + + def test_merge(self): + task = Task() + task.event('failed', time(), time()) + task.event('started', time(), time()) + task.event('received', time(), time(), { + 'name': 'tasks.add', 'args': (2, 2), + }) + self.assertEqual(task.state, states.FAILURE) + self.assertEqual(task.name, 'tasks.add') + self.assertTupleEqual(task.args, (2, 2)) + task.event('retried', time(), time()) + self.assertEqual(task.state, states.RETRY) + + def test_repr(self): + self.assertTrue(repr(Task(uuid='xxx', name='tasks.add'))) + + +class test_State(AppCase): + + def test_repr(self): + self.assertTrue(repr(State())) + + def test_pickleable(self): + self.assertTrue(pickle.loads(pickle.dumps(State()))) + + def test_task_logical_clock_ordering(self): + state = State() + r = ev_logical_clock_ordering(state) + tA, tB, tC = r.uids + r.play() + now = list(state.tasks_by_time()) + self.assertEqual(now[0][0], tA) + self.assertEqual(now[1][0], tC) + self.assertEqual(now[2][0], tB) + for _ in range(1000): + shuffle(r.uids) + tA, tB, tC = r.uids + r.rewind_with_offset(r.current_clock + 1, r.uids) + r.play() + now = list(state.tasks_by_time()) + self.assertEqual(now[0][0], tA) + self.assertEqual(now[1][0], tC) + self.assertEqual(now[2][0], tB) + + def test_worker_online_offline(self): + r = ev_worker_online_offline(State()) + next(r) + self.assertTrue(r.state.alive_workers()) + self.assertTrue(r.state.workers['utest1'].alive) + r.play() + self.assertFalse(r.state.alive_workers()) + self.assertFalse(r.state.workers['utest1'].alive) + + def test_itertasks(self): + s = State() + s.tasks = {'a': 'a', 'b': 'b', 'c': 'c', 'd': 'd'} + self.assertEqual(len(list(s.itertasks(limit=2))), 2) + + def test_worker_heartbeat_expire(self): + r = ev_worker_heartbeats(State()) + next(r) + self.assertFalse(r.state.alive_workers()) + self.assertFalse(r.state.workers['utest1'].alive) + r.play() + self.assertTrue(r.state.alive_workers()) + self.assertTrue(r.state.workers['utest1'].alive) + + def test_task_states(self): + r = ev_task_states(State()) + + # RECEIVED + next(r) + self.assertTrue(r.tid in r.state.tasks) + task = r.state.tasks[r.tid] + self.assertEqual(task.state, states.RECEIVED) + self.assertTrue(task.received) + self.assertEqual(task.timestamp, task.received) + self.assertEqual(task.worker.hostname, 'utest1') + + # STARTED + next(r) + self.assertTrue(r.state.workers['utest1'].alive, + 'any task event adds worker heartbeat') + self.assertEqual(task.state, states.STARTED) + self.assertTrue(task.started) + self.assertEqual(task.timestamp, task.started) + self.assertEqual(task.worker.hostname, 'utest1') + + # REVOKED + next(r) + self.assertEqual(task.state, states.REVOKED) + self.assertTrue(task.revoked) + self.assertEqual(task.timestamp, task.revoked) + self.assertEqual(task.worker.hostname, 'utest1') + + # RETRY + next(r) + self.assertEqual(task.state, states.RETRY) + self.assertTrue(task.retried) + self.assertEqual(task.timestamp, task.retried) + self.assertEqual(task.worker.hostname, 'utest1') + self.assertEqual(task.exception, "KeyError('bar')") + self.assertEqual(task.traceback, 'line 2 at main') + + # FAILURE + next(r) + self.assertEqual(task.state, states.FAILURE) + self.assertTrue(task.failed) + self.assertEqual(task.timestamp, task.failed) + self.assertEqual(task.worker.hostname, 'utest1') + self.assertEqual(task.exception, "KeyError('foo')") + self.assertEqual(task.traceback, 'line 1 at main') + + # SUCCESS + next(r) + self.assertEqual(task.state, states.SUCCESS) + self.assertTrue(task.succeeded) + self.assertEqual(task.timestamp, task.succeeded) + self.assertEqual(task.worker.hostname, 'utest1') + self.assertEqual(task.result, '4') + self.assertEqual(task.runtime, 0.1234) + + def assertStateEmpty(self, state): + self.assertFalse(state.tasks) + self.assertFalse(state.workers) + self.assertFalse(state.event_count) + self.assertFalse(state.task_count) + + def assertState(self, state): + self.assertTrue(state.tasks) + self.assertTrue(state.workers) + self.assertTrue(state.event_count) + self.assertTrue(state.task_count) + + def test_freeze_while(self): + s = State() + r = ev_snapshot(s) + r.play() + + def work(): + pass + + s.freeze_while(work, clear_after=True) + self.assertFalse(s.event_count) + + s2 = State() + r = ev_snapshot(s2) + r.play() + s2.freeze_while(work, clear_after=False) + self.assertTrue(s2.event_count) + + def test_clear_tasks(self): + s = State() + r = ev_snapshot(s) + r.play() + self.assertTrue(s.tasks) + s.clear_tasks(ready=False) + self.assertFalse(s.tasks) + + def test_clear(self): + r = ev_snapshot(State()) + r.play() + self.assertTrue(r.state.event_count) + self.assertTrue(r.state.workers) + self.assertTrue(r.state.tasks) + self.assertTrue(r.state.task_count) + + r.state.clear() + self.assertFalse(r.state.event_count) + self.assertFalse(r.state.workers) + self.assertTrue(r.state.tasks) + self.assertFalse(r.state.task_count) + + r.state.clear(False) + self.assertFalse(r.state.tasks) + + def test_task_types(self): + r = ev_snapshot(State()) + r.play() + self.assertEqual(sorted(r.state.task_types()), ['task1', 'task2']) + + def test_tasks_by_timestamp(self): + r = ev_snapshot(State()) + r.play() + self.assertEqual(len(list(r.state.tasks_by_timestamp())), 20) + + def test_tasks_by_type(self): + r = ev_snapshot(State()) + r.play() + self.assertEqual(len(list(r.state.tasks_by_type('task1'))), 10) + self.assertEqual(len(list(r.state.tasks_by_type('task2'))), 10) + + def test_alive_workers(self): + r = ev_snapshot(State()) + r.play() + self.assertEqual(len(r.state.alive_workers()), 3) + + def test_tasks_by_worker(self): + r = ev_snapshot(State()) + r.play() + self.assertEqual(len(list(r.state.tasks_by_worker('utest1'))), 10) + self.assertEqual(len(list(r.state.tasks_by_worker('utest2'))), 10) + + def test_survives_unknown_worker_event(self): + s = State() + s.event({ + 'type': 'worker-unknown-event-xxx', + 'foo': 'bar', + }) + s.event({ + 'type': 'worker-unknown-event-xxx', + 'hostname': 'xxx', + 'foo': 'bar', + }) + + def test_survives_unknown_worker_leaving(self): + s = State(on_node_leave=Mock(name='on_node_leave')) + (worker, created), subject = s.event({ + 'type': 'worker-offline', + 'hostname': 'unknown@vandelay.com', + 'timestamp': time(), + 'local_received': time(), + 'clock': 301030134894833, + }) + self.assertEqual(worker, Worker('unknown@vandelay.com')) + self.assertFalse(created) + self.assertEqual(subject, 'offline') + self.assertNotIn('unknown@vandelay.com', s.workers) + s.on_node_leave.assert_called_with(worker) + + def test_on_node_join_callback(self): + s = State(on_node_join=Mock(name='on_node_join')) + (worker, created), subject = s.event({ + 'type': 'worker-online', + 'hostname': 'george@vandelay.com', + 'timestamp': time(), + 'local_received': time(), + 'clock': 34314, + }) + self.assertTrue(worker) + self.assertTrue(created) + self.assertEqual(subject, 'online') + self.assertIn('george@vandelay.com', s.workers) + s.on_node_join.assert_called_with(worker) + + def test_survives_unknown_task_event(self): + s = State() + s.event( + { + 'type': 'task-unknown-event-xxx', + 'foo': 'bar', + 'uuid': 'x', + 'hostname': 'y', + 'timestamp': time(), + 'local_received': time(), + 'clock': 0, + }, + ) + + def test_limits_maxtasks(self): + s = State(max_tasks_in_memory=1) + s.heap_multiplier = 2 + s.event({ + 'type': 'task-unknown-event-xxx', + 'foo': 'bar', + 'uuid': 'x', + 'hostname': 'y', + 'clock': 3, + 'timestamp': time(), + 'local_received': time(), + }) + s.event({ + 'type': 'task-unknown-event-xxx', + 'foo': 'bar', + 'uuid': 'y', + 'hostname': 'y', + 'clock': 4, + 'timestamp': time(), + 'local_received': time(), + }) + s.event({ + 'type': 'task-unknown-event-xxx', + 'foo': 'bar', + 'uuid': 'z', + 'hostname': 'y', + 'clock': 5, + 'timestamp': time(), + 'local_received': time(), + }) + self.assertEqual(len(s._taskheap), 2) + self.assertEqual(s._taskheap[0].clock, 4) + self.assertEqual(s._taskheap[1].clock, 5) + + s._taskheap.append(s._taskheap[0]) + self.assertTrue(list(s.tasks_by_time())) + + def test_callback(self): + scratch = {} + + def callback(state, event): + scratch['recv'] = True + + s = State(callback=callback) + s.event({'type': 'worker-online'}) + self.assertTrue(scratch.get('recv')) diff --git a/celery/tests/fixups/__init__.py b/celery/tests/fixups/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/celery/tests/fixups/test_django.py b/celery/tests/fixups/test_django.py new file mode 100644 index 0000000..1d4ec5c --- /dev/null +++ b/celery/tests/fixups/test_django.py @@ -0,0 +1,300 @@ +from __future__ import absolute_import + +import os + +from contextlib import contextmanager + +from celery.fixups.django import ( + _maybe_close_fd, + fixup, + DjangoFixup, + DjangoWorkerFixup, +) + +from celery.tests.case import ( + AppCase, Mock, patch, patch_many, patch_modules, mask_modules, +) + + +class FixupCase(AppCase): + Fixup = None + + @contextmanager + def fixup_context(self, app): + with patch('celery.fixups.django.DjangoWorkerFixup.validate_models'): + with patch('celery.fixups.django.symbol_by_name') as symbyname: + with patch('celery.fixups.django.import_module') as impmod: + f = self.Fixup(app) + yield f, impmod, symbyname + + +class test_DjangoFixup(FixupCase): + Fixup = DjangoFixup + + def test_fixup(self): + with patch('celery.fixups.django.DjangoFixup') as Fixup: + with patch.dict(os.environ, DJANGO_SETTINGS_MODULE=''): + fixup(self.app) + self.assertFalse(Fixup.called) + with patch.dict(os.environ, DJANGO_SETTINGS_MODULE='settings'): + with mask_modules('django'): + with self.assertWarnsRegex(UserWarning, 'but Django is'): + fixup(self.app) + self.assertFalse(Fixup.called) + with patch_modules('django'): + fixup(self.app) + self.assertTrue(Fixup.called) + + def test_maybe_close_fd(self): + with patch('os.close'): + _maybe_close_fd(Mock()) + _maybe_close_fd(object()) + + def test_init(self): + with self.fixup_context(self.app) as (f, importmod, sym): + self.assertTrue(f) + + def se(name): + if name == 'django.utils.timezone:now': + raise ImportError() + return Mock() + sym.side_effect = se + self.assertTrue(self.Fixup(self.app)._now) + + def test_install(self): + self.app.loader = Mock() + with self.fixup_context(self.app) as (f, _, _): + with patch_many('os.getcwd', 'sys.path', + 'celery.fixups.django.signals') as (cw, p, sigs): + cw.return_value = '/opt/vandelay' + f.install() + sigs.worker_init.connect.assert_called_with(f.on_worker_init) + self.assertEqual(self.app.loader.now, f.now) + self.assertEqual(self.app.loader.mail_admins, f.mail_admins) + p.append.assert_called_with('/opt/vandelay') + + def test_now(self): + with self.fixup_context(self.app) as (f, _, _): + self.assertTrue(f.now(utc=True)) + self.assertFalse(f._now.called) + self.assertTrue(f.now(utc=False)) + self.assertTrue(f._now.called) + + def test_mail_admins(self): + with self.fixup_context(self.app) as (f, _, _): + f.mail_admins('sub', 'body', True) + f._mail_admins.assert_called_with( + 'sub', 'body', fail_silently=True, + ) + + def test_on_worker_init(self): + with self.fixup_context(self.app) as (f, _, _): + with patch('celery.fixups.django.DjangoWorkerFixup') as DWF: + f.on_worker_init() + DWF.assert_called_with(f.app) + DWF.return_value.install.assert_called_with() + self.assertIs( + f._worker_fixup, DWF.return_value.install.return_value, + ) + + +class test_DjangoWorkerFixup(FixupCase): + Fixup = DjangoWorkerFixup + + def test_init(self): + with self.fixup_context(self.app) as (f, importmod, sym): + self.assertTrue(f) + + def se(name): + if name == 'django.db:close_old_connections': + raise ImportError() + return Mock() + sym.side_effect = se + self.assertIsNone(self.Fixup(self.app)._close_old_connections) + + def test_install(self): + self.app.conf = {'CELERY_DB_REUSE_MAX': None} + self.app.loader = Mock() + with self.fixup_context(self.app) as (f, _, _): + with patch_many('celery.fixups.django.signals') as (sigs, ): + f.install() + sigs.beat_embedded_init.connect.assert_called_with( + f.close_database, + ) + sigs.worker_ready.connect.assert_called_with(f.on_worker_ready) + sigs.task_prerun.connect.assert_called_with(f.on_task_prerun) + sigs.task_postrun.connect.assert_called_with(f.on_task_postrun) + sigs.worker_process_init.connect.assert_called_with( + f.on_worker_process_init, + ) + + def test_on_worker_process_init(self): + with self.fixup_context(self.app) as (f, _, _): + with patch('celery.fixups.django._maybe_close_fd') as mcf: + _all = f._db.connections.all = Mock() + conns = _all.return_value = [ + Mock(), Mock(), + ] + conns[0].connection = None + with patch.object(f, 'close_cache'): + with patch.object(f, '_close_database'): + f.on_worker_process_init() + mcf.assert_called_with(conns[1].connection) + f.close_cache.assert_called_with() + f._close_database.assert_called_with() + + mcf.reset_mock() + _all.side_effect = AttributeError() + f.on_worker_process_init() + mcf.assert_called_with(f._db.connection.connection) + f._db.connection = None + f.on_worker_process_init() + + def test_on_task_prerun(self): + task = Mock() + with self.fixup_context(self.app) as (f, _, _): + task.request.is_eager = False + with patch.object(f, 'close_database'): + f.on_task_prerun(task) + f.close_database.assert_called_with() + + task.request.is_eager = True + with patch.object(f, 'close_database'): + f.on_task_prerun(task) + self.assertFalse(f.close_database.called) + + def test_on_task_postrun(self): + task = Mock() + with self.fixup_context(self.app) as (f, _, _): + with patch.object(f, 'close_cache'): + task.request.is_eager = False + with patch.object(f, 'close_database'): + f.on_task_postrun(task) + self.assertTrue(f.close_database.called) + self.assertTrue(f.close_cache.called) + + # when a task is eager, do not close connections + with patch.object(f, 'close_cache'): + task.request.is_eager = True + with patch.object(f, 'close_database'): + f.on_task_postrun(task) + self.assertFalse(f.close_database.called) + self.assertFalse(f.close_cache.called) + + def test_close_database(self): + with self.fixup_context(self.app) as (f, _, _): + f._close_old_connections = Mock() + f.close_database() + f._close_old_connections.assert_called_with() + f._close_old_connections = None + with patch.object(f, '_close_database') as _close: + f.db_reuse_max = None + f.close_database() + _close.assert_called_with() + _close.reset_mock() + + f.db_reuse_max = 10 + f._db_recycles = 3 + f.close_database() + self.assertFalse(_close.called) + self.assertEqual(f._db_recycles, 4) + _close.reset_mock() + + f._db_recycles = 20 + f.close_database() + _close.assert_called_with() + self.assertEqual(f._db_recycles, 1) + + def test__close_database(self): + with self.fixup_context(self.app) as (f, _, _): + conns = f._db.connections = [Mock(), Mock(), Mock()] + conns[1].close.side_effect = KeyError('already closed') + f.database_errors = (KeyError, ) + + f._close_database() + conns[0].close.assert_called_with() + conns[1].close.assert_called_with() + conns[2].close.assert_called_with() + + conns[1].close.side_effect = KeyError('omg') + with self.assertRaises(KeyError): + f._close_database() + + class Object(object): + pass + o = Object() + o.close_connection = Mock() + f._db = o + f._close_database() + o.close_connection.assert_called_with() + + def test_close_cache(self): + with self.fixup_context(self.app) as (f, _, _): + f.close_cache() + f._cache.cache.close.assert_called_with() + f._cache.cache.close.side_effect = TypeError() + f.close_cache() + + def test_on_worker_ready(self): + with self.fixup_context(self.app) as (f, _, _): + f._settings.DEBUG = False + f.on_worker_ready() + with self.assertWarnsRegex(UserWarning, r'leads to a memory leak'): + f._settings.DEBUG = True + f.on_worker_ready() + + def test_mysql_errors(self): + with patch_modules('MySQLdb'): + import MySQLdb as mod + mod.DatabaseError = Mock() + mod.InterfaceError = Mock() + mod.OperationalError = Mock() + with self.fixup_context(self.app) as (f, _, _): + self.assertIn(mod.DatabaseError, f.database_errors) + self.assertIn(mod.InterfaceError, f.database_errors) + self.assertIn(mod.OperationalError, f.database_errors) + with mask_modules('MySQLdb'): + with self.fixup_context(self.app): + pass + + def test_pg_errors(self): + with patch_modules('psycopg2'): + import psycopg2 as mod + mod.DatabaseError = Mock() + mod.InterfaceError = Mock() + mod.OperationalError = Mock() + with self.fixup_context(self.app) as (f, _, _): + self.assertIn(mod.DatabaseError, f.database_errors) + self.assertIn(mod.InterfaceError, f.database_errors) + self.assertIn(mod.OperationalError, f.database_errors) + with mask_modules('psycopg2'): + with self.fixup_context(self.app): + pass + + def test_sqlite_errors(self): + with patch_modules('sqlite3'): + import sqlite3 as mod + mod.DatabaseError = Mock() + mod.InterfaceError = Mock() + mod.OperationalError = Mock() + with self.fixup_context(self.app) as (f, _, _): + self.assertIn(mod.DatabaseError, f.database_errors) + self.assertIn(mod.InterfaceError, f.database_errors) + self.assertIn(mod.OperationalError, f.database_errors) + with mask_modules('sqlite3'): + with self.fixup_context(self.app): + pass + + def test_oracle_errors(self): + with patch_modules('cx_Oracle'): + import cx_Oracle as mod + mod.DatabaseError = Mock() + mod.InterfaceError = Mock() + mod.OperationalError = Mock() + with self.fixup_context(self.app) as (f, _, _): + self.assertIn(mod.DatabaseError, f.database_errors) + self.assertIn(mod.InterfaceError, f.database_errors) + self.assertIn(mod.OperationalError, f.database_errors) + with mask_modules('cx_Oracle'): + with self.fixup_context(self.app): + pass diff --git a/celery/tests/functional/__init__.py b/celery/tests/functional/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/celery/tests/functional/case.py b/celery/tests/functional/case.py new file mode 100644 index 0000000..298c684 --- /dev/null +++ b/celery/tests/functional/case.py @@ -0,0 +1,178 @@ +from __future__ import absolute_import + +import atexit +import logging +import os +import signal +import socket +import sys +import traceback + +from itertools import count +from time import time + +from celery import current_app +from celery.exceptions import TimeoutError +from celery.app.control import flatten_reply +from celery.utils.imports import qualname + +from celery.tests.case import Case + +HOSTNAME = socket.gethostname() + + +def say(msg): + sys.stderr.write('%s\n' % msg) + + +def try_while(fun, reason='Timed out', timeout=10, interval=0.5): + time_start = time() + for iterations in count(0): + if time() - time_start >= timeout: + raise TimeoutError() + ret = fun() + if ret: + return ret + + +class Worker(object): + started = False + worker_ids = count(1) + _shutdown_called = False + + def __init__(self, hostname, loglevel='error', app=None): + self.hostname = hostname + self.loglevel = loglevel + self.app = app or current_app._get_current_object() + + def start(self): + if not self.started: + self._fork_and_exec() + self.started = True + + def _fork_and_exec(self): + pid = os.fork() + if pid == 0: + self.app.worker_main(['worker', '--loglevel=INFO', + '-n', self.hostname, + '-P', 'solo']) + os._exit(0) + self.pid = pid + + def ping(self, *args, **kwargs): + return self.app.control.ping(*args, **kwargs) + + def is_alive(self, timeout=1): + r = self.ping(destination=[self.hostname], timeout=timeout) + return self.hostname in flatten_reply(r) + + def wait_until_started(self, timeout=10, interval=0.5): + try_while( + lambda: self.is_alive(interval), + "Worker won't start (after %s secs.)" % timeout, + interval=interval, timeout=timeout, + ) + say('--WORKER %s IS ONLINE--' % self.hostname) + + def ensure_shutdown(self, timeout=10, interval=0.5): + os.kill(self.pid, signal.SIGTERM) + try_while( + lambda: not self.is_alive(interval), + "Worker won't shutdown (after %s secs.)" % timeout, + timeout=10, interval=0.5, + ) + say('--WORKER %s IS SHUTDOWN--' % self.hostname) + self._shutdown_called = True + + def ensure_started(self): + self.start() + self.wait_until_started() + + @classmethod + def managed(cls, hostname=None, caller=None): + hostname = hostname or socket.gethostname() + if caller: + hostname = '.'.join([qualname(caller), hostname]) + else: + hostname += str(next(cls.worker_ids())) + worker = cls(hostname) + worker.ensure_started() + stack = traceback.format_stack() + + @atexit.register + def _ensure_shutdown_once(): + if not worker._shutdown_called: + say('-- Found worker not stopped at shutdown: %s\n%s' % ( + worker.hostname, + '\n'.join(stack))) + worker.ensure_shutdown() + + return worker + + +class WorkerCase(Case): + hostname = HOSTNAME + worker = None + + @classmethod + def setUpClass(cls): + logging.getLogger('amqp').setLevel(logging.ERROR) + cls.worker = Worker.managed(cls.hostname, caller=cls) + + @classmethod + def tearDownClass(cls): + cls.worker.ensure_shutdown() + + def assertWorkerAlive(self, timeout=1): + self.assertTrue(self.worker.is_alive) + + def inspect(self, timeout=1): + return self.app.control.inspect([self.worker.hostname], + timeout=timeout) + + def my_response(self, response): + return flatten_reply(response)[self.worker.hostname] + + def is_accepted(self, task_id, interval=0.5): + active = self.inspect(timeout=interval).active() + if active: + for task in active[self.worker.hostname]: + if task['id'] == task_id: + return True + return False + + def is_reserved(self, task_id, interval=0.5): + reserved = self.inspect(timeout=interval).reserved() + if reserved: + for task in reserved[self.worker.hostname]: + if task['id'] == task_id: + return True + return False + + def is_scheduled(self, task_id, interval=0.5): + schedule = self.inspect(timeout=interval).scheduled() + if schedule: + for item in schedule[self.worker.hostname]: + if item['request']['id'] == task_id: + return True + return False + + def is_received(self, task_id, interval=0.5): + return (self.is_reserved(task_id, interval) or + self.is_scheduled(task_id, interval) or + self.is_accepted(task_id, interval)) + + def ensure_accepted(self, task_id, interval=0.5, timeout=10): + return try_while(lambda: self.is_accepted(task_id, interval), + 'Task not accepted within timeout', + interval=0.5, timeout=10) + + def ensure_received(self, task_id, interval=0.5, timeout=10): + return try_while(lambda: self.is_received(task_id, interval), + 'Task not receied within timeout', + interval=0.5, timeout=10) + + def ensure_scheduled(self, task_id, interval=0.5, timeout=10): + return try_while(lambda: self.is_scheduled(task_id, interval), + 'Task not scheduled within timeout', + interval=0.5, timeout=10) diff --git a/celery/tests/functional/tasks.py b/celery/tests/functional/tasks.py new file mode 100644 index 0000000..85479b4 --- /dev/null +++ b/celery/tests/functional/tasks.py @@ -0,0 +1,24 @@ +from __future__ import absolute_import + +import time + +from celery import task, signature + + +@task() +def add(x, y): + return x + y + + +@task() +def add_cb(x, y, callback=None): + result = x + y + if callback: + return signature(callback).apply_async(result) + return result + + +@task() +def sleeptask(i): + time.sleep(i) + return i diff --git a/celery/tests/security/__init__.py b/celery/tests/security/__init__.py new file mode 100644 index 0000000..50b7f4c --- /dev/null +++ b/celery/tests/security/__init__.py @@ -0,0 +1,68 @@ +from __future__ import absolute_import +""" +Keys and certificates for tests (KEY1 is a private key of CERT1, etc.) + +Generated with `extra/security/get-cert.sh` + +""" +KEY1 = """-----BEGIN RSA PRIVATE KEY----- +MIICXQIBAAKBgQC9Twh0V5q/R1Q8N+Y+CNM4lj9AXeZL0gYowoK1ht2ZLCDU9vN5 +dhV0x3sqaXLjQNeCGd6b2vTbFGdF2E45//IWz6/BdPFWaPm0rtYbcxZHqXDZScRp +vFDLHhMysdqQWHxXVxpqIXXo4B7bnfnGvXhYwYITeEyQylV/rnH53mdV8wIDAQAB +AoGBAKUJN4elr+S9nHP7D6BZNTsJ0Q6eTd0ftfrmx+jVMG8Oh3jh6ZSkG0R5e6iX +0W7I4pgrUWRyWDB98yJy1o+90CAN/D80o8SbmW/zfA2WLBteOujMfCEjNrc/Nodf +6MZ0QQ6PnPH6pp94i3kNmFD8Mlzm+ODrUjPF0dCNf474qeKhAkEA7SXj5cQPyQXM +s15oGX5eb6VOk96eAPtEC72cLSh6o+VYmXyGroV1A2JPm6IzH87mTqjWXG229hjt +XVvDbdY2uQJBAMxblWFaWJhhU6Y1euazaBl/OyLYlqNz4LZ0RzCulEoV/gMGYU32 +PbilD5fpFsyhp5oCxnWNEsUFovYMKjKM3AsCQQCIlOcBoP76ZxWzRK8t56MaKBnu +fiuAIzbYkDbPp12i4Wc61wZ2ozR2Y3u4Bh3tturb6M+04hea+1ZSC5StwM85AkAp +UPLYpe13kWXaGsHoVqlbTk/kcamzDkCGYufpvcIZYGzkq6uMmZZM+II4klWbtasv +BhSdu5Hp54PU/wyg/72VAkBy1/oM3/QJ35Vb6TByHBLFR4nOuORoRclmxcoCPva9 +xqkQQn+UgBtOemRXpFCuKaoXonA3nLeB54SWcC6YUOcR +-----END RSA PRIVATE KEY-----""" + +KEY2 = """-----BEGIN RSA PRIVATE KEY----- +MIICXQIBAAKBgQDH22L8b9AmST9ABDmQTQ2DWMdDmK5YXZt4AIY81IcsTQ/ccM0C +fwXEP9tdkYwtcxMCWdASwY5pfMy9vFp0hyrRQMSNfuoxAgONuNWPyQoIvY3ZXRe6 +rS+hb/LN4+vdjX+oxmYiQ2HmSB9rh2bepE6Cw+RLJr5sXXq+xZJ+BLt5tQIDAQAB +AoGBAMGBO0Arip/nP6Rd8tYypKjN5nEefX/1cjgoWdC//fj4zCil1vlZv12abm0U +JWNEDd2y0/G1Eow0V5BFtFcrIFowU44LZEiSf7sKXlNHRHlbZmDgNXFZOt7nVbHn +6SN+oCYjaPjji8idYeb3VQXPtqMoMn73MuyxD3k3tWmVLonpAkEA6hsu62qhUk5k +Nt88UZOauU1YizxsWvT0bHioaceE4TEsbO3NZs7dmdJIcRFcU787lANaaIq7Rw26 +qcumME9XhwJBANqMOzsYQ6BX54UzS6x99Jjlq9MEbTCbAEZr/yjopb9f617SwfuE +AEKnIq3HL6/Tnhv3V8Zy3wYHgDoGNeTVe+MCQQDi/nyeNAQ8RFqTgh2Ak/jAmCi0 +yV/fSgj+bHgQKS/FEuMas/IoL4lbrzQivkyhv5lLSX0ORQaWPM+z+A0qZqRdAkBh +XE+Wx/x4ljCh+nQf6AzrgIXHgBVUrfi1Zq9Jfjs4wnaMy793WRr0lpiwaigoYFHz +i4Ei+1G30eeh8dpYk3KZAkB0ucTOsQynDlL5rLGYZ+IcfSfH3w2l5EszY47kKQG9 +Fxeq/HOp9JYw4gRu6Ycvqu57KHwpHhR0FCXRBxuYcJ5V +-----END RSA PRIVATE KEY-----""" + +CERT1 = """-----BEGIN CERTIFICATE----- +MIICVzCCAcACCQC72PP7b7H9BTANBgkqhkiG9w0BAQUFADBwMQswCQYDVQQGEwJV +UzELMAkGA1UECBMCQ0ExCzAJBgNVBAcTAlNGMQ8wDQYDVQQKEwZDZWxlcnkxDzAN +BgNVBAMTBkNFbGVyeTElMCMGCSqGSIb3DQEJARYWY2VydEBjZWxlcnlwcm9qZWN0 +Lm9yZzAeFw0xMzA3MjQxMjExMTRaFw0xNDA3MjQxMjExMTRaMHAxCzAJBgNVBAYT +AlVTMQswCQYDVQQIEwJDQTELMAkGA1UEBxMCU0YxDzANBgNVBAoTBkNlbGVyeTEP +MA0GA1UEAxMGQ0VsZXJ5MSUwIwYJKoZIhvcNAQkBFhZjZXJ0QGNlbGVyeXByb2pl +Y3Qub3JnMIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQC9Twh0V5q/R1Q8N+Y+ +CNM4lj9AXeZL0gYowoK1ht2ZLCDU9vN5dhV0x3sqaXLjQNeCGd6b2vTbFGdF2E45 +//IWz6/BdPFWaPm0rtYbcxZHqXDZScRpvFDLHhMysdqQWHxXVxpqIXXo4B7bnfnG +vXhYwYITeEyQylV/rnH53mdV8wIDAQABMA0GCSqGSIb3DQEBBQUAA4GBAKA4tD3J +94tsnQxFxHP7Frt7IvGMH+3wMqOiXFgYxPJX2tyaPvOLJ/7ERE4MkrvZO7IRC0iA +yKBe0pucdrTgsJoDV8juahuyjXOjvU14+q7Wv7pj7zqddVavzK8STLX4/FMIDnbK +aMGJl7wyj6V2yy6ANSbmy0uQjHikI6DrZEoK +-----END CERTIFICATE-----""" + +CERT2 = """-----BEGIN CERTIFICATE----- +MIICATCCAWoCCQCV/9A2ZBM37TANBgkqhkiG9w0BAQUFADBFMQswCQYDVQQGEwJB +VTETMBEGA1UECAwKU29tZS1TdGF0ZTEhMB8GA1UECgwYSW50ZXJuZXQgV2lkZ2l0 +cyBQdHkgTHRkMB4XDTExMDcxOTA5MDkwMloXDTEyMDcxODA5MDkwMlowRTELMAkG +A1UEBhMCQVUxEzARBgNVBAgMClNvbWUtU3RhdGUxITAfBgNVBAoMGEludGVybmV0 +IFdpZGdpdHMgUHR5IEx0ZDCBnzANBgkqhkiG9w0BAQEFAAOBjQAwgYkCgYEAx9ti +/G/QJkk/QAQ5kE0Ng1jHQ5iuWF2beACGPNSHLE0P3HDNAn8FxD/bXZGMLXMTAlnQ +EsGOaXzMvbxadIcq0UDEjX7qMQIDjbjVj8kKCL2N2V0Xuq0voW/yzePr3Y1/qMZm +IkNh5kgfa4dm3qROgsPkSya+bF16vsWSfgS7ebUCAwEAATANBgkqhkiG9w0BAQUF +AAOBgQBzaZ5vBkzksPhnWb2oobuy6Ne/LMEtdQ//qeVY4sKl2tOJUCSdWRen9fqP +e+zYdEdkFCd8rp568Eiwkq/553uy4rlE927/AEqs/+KGYmAtibk/9vmi+/+iZXyS +WWZybzzDZFncq1/N1C3Y/hrCBNDFO4TsnTLAhWtZ4c0vDAiacw== +-----END CERTIFICATE-----""" diff --git a/celery/tests/security/case.py b/celery/tests/security/case.py new file mode 100644 index 0000000..ba421a9 --- /dev/null +++ b/celery/tests/security/case.py @@ -0,0 +1,16 @@ +from __future__ import absolute_import + +from celery.tests.case import AppCase, SkipTest + +import sys + + +class SecurityCase(AppCase): + + def setup(self): + if sys.version_info[0] == 3: + raise SkipTest('PyOpenSSL does not work on Python 3') + try: + from OpenSSL import crypto # noqa + except ImportError: + raise SkipTest('OpenSSL.crypto not installed') diff --git a/celery/tests/security/test_certificate.py b/celery/tests/security/test_certificate.py new file mode 100644 index 0000000..e1d38a9 --- /dev/null +++ b/celery/tests/security/test_certificate.py @@ -0,0 +1,77 @@ +from __future__ import absolute_import + +from celery.exceptions import SecurityError +from celery.security.certificate import Certificate, CertStore, FSCertStore + +from . import CERT1, CERT2, KEY1 +from .case import SecurityCase + +from celery.tests.case import Mock, mock_open, patch + + +class test_Certificate(SecurityCase): + + def test_valid_certificate(self): + Certificate(CERT1) + Certificate(CERT2) + + def test_invalid_certificate(self): + self.assertRaises((SecurityError, TypeError), Certificate, None) + self.assertRaises(SecurityError, Certificate, '') + self.assertRaises(SecurityError, Certificate, 'foo') + self.assertRaises(SecurityError, Certificate, CERT1[:20] + CERT1[21:]) + self.assertRaises(SecurityError, Certificate, KEY1) + + def test_has_expired(self): + self.assertTrue(Certificate(CERT1).has_expired()) + + +class test_CertStore(SecurityCase): + + def test_itercerts(self): + cert1 = Certificate(CERT1) + cert2 = Certificate(CERT2) + certstore = CertStore() + for c in certstore.itercerts(): + self.assertTrue(False) + certstore.add_cert(cert1) + certstore.add_cert(cert2) + for c in certstore.itercerts(): + self.assertIn(c, (cert1, cert2)) + + def test_duplicate(self): + cert1 = Certificate(CERT1) + certstore = CertStore() + certstore.add_cert(cert1) + self.assertRaises(SecurityError, certstore.add_cert, cert1) + + +class test_FSCertStore(SecurityCase): + + @patch('os.path.isdir') + @patch('glob.glob') + @patch('celery.security.certificate.Certificate') + def test_init(self, Certificate, glob, isdir): + cert = Certificate.return_value = Mock() + cert.has_expired.return_value = False + isdir.return_value = True + glob.return_value = ['foo.cert'] + with mock_open(): + cert.get_id.return_value = 1 + x = FSCertStore('/var/certs') + self.assertIn(1, x._certs) + glob.assert_called_with('/var/certs/*') + + # they both end up with the same id + glob.return_value = ['foo.cert', 'bar.cert'] + with self.assertRaises(SecurityError): + x = FSCertStore('/var/certs') + glob.return_value = ['foo.cert'] + + cert.has_expired.return_value = True + with self.assertRaises(SecurityError): + x = FSCertStore('/var/certs') + + isdir.return_value = False + with self.assertRaises(SecurityError): + x = FSCertStore('/var/certs') diff --git a/celery/tests/security/test_key.py b/celery/tests/security/test_key.py new file mode 100644 index 0000000..d8551b2 --- /dev/null +++ b/celery/tests/security/test_key.py @@ -0,0 +1,26 @@ +from __future__ import absolute_import + +from celery.exceptions import SecurityError +from celery.security.key import PrivateKey + +from . import CERT1, KEY1, KEY2 +from .case import SecurityCase + + +class test_PrivateKey(SecurityCase): + + def test_valid_private_key(self): + PrivateKey(KEY1) + PrivateKey(KEY2) + + def test_invalid_private_key(self): + self.assertRaises((SecurityError, TypeError), PrivateKey, None) + self.assertRaises(SecurityError, PrivateKey, '') + self.assertRaises(SecurityError, PrivateKey, 'foo') + self.assertRaises(SecurityError, PrivateKey, KEY1[:20] + KEY1[21:]) + self.assertRaises(SecurityError, PrivateKey, CERT1) + + def test_sign(self): + pkey = PrivateKey(KEY1) + pkey.sign('test', 'sha1') + self.assertRaises(ValueError, pkey.sign, 'test', 'unknown') diff --git a/celery/tests/security/test_security.py b/celery/tests/security/test_security.py new file mode 100644 index 0000000..227c65a --- /dev/null +++ b/celery/tests/security/test_security.py @@ -0,0 +1,110 @@ +""" +Keys and certificates for tests (KEY1 is a private key of CERT1, etc.) + +Generated with: + +.. code-block:: bash + + $ openssl genrsa -des3 -passout pass:test -out key1.key 1024 + $ openssl req -new -key key1.key -out key1.csr -passin pass:test + $ cp key1.key key1.key.org + $ openssl rsa -in key1.key.org -out key1.key -passin pass:test + $ openssl x509 -req -days 365 -in cert1.csr \ + -signkey key1.key -out cert1.crt + $ rm key1.key.org cert1.csr + +""" +from __future__ import absolute_import + +from kombu.serialization import disable_insecure_serializers + +from celery.exceptions import ImproperlyConfigured, SecurityError +from celery.five import builtins +from celery.security.utils import reraise_errors +from kombu.serialization import registry + +from .case import SecurityCase + +from celery.tests.case import Mock, mock_open, patch + + +class test_security(SecurityCase): + + def teardown(self): + registry._disabled_content_types.clear() + + def test_disable_insecure_serializers(self): + try: + disabled = registry._disabled_content_types + self.assertTrue(disabled) + + disable_insecure_serializers( + ['application/json', 'application/x-python-serialize'], + ) + self.assertIn('application/x-yaml', disabled) + self.assertNotIn('application/json', disabled) + self.assertNotIn('application/x-python-serialize', disabled) + disabled.clear() + + disable_insecure_serializers(allowed=None) + self.assertIn('application/x-yaml', disabled) + self.assertIn('application/json', disabled) + self.assertIn('application/x-python-serialize', disabled) + finally: + disable_insecure_serializers(allowed=['json']) + + def test_setup_security(self): + disabled = registry._disabled_content_types + self.assertEqual(0, len(disabled)) + + self.app.conf.CELERY_TASK_SERIALIZER = 'json' + self.app.setup_security() + self.assertIn('application/x-python-serialize', disabled) + disabled.clear() + + @patch('celery.security.register_auth') + @patch('celery.security._disable_insecure_serializers') + def test_setup_registry_complete(self, dis, reg, key='KEY', cert='CERT'): + calls = [0] + + def effect(*args): + try: + m = Mock() + m.read.return_value = 'B' if calls[0] else 'A' + return m + finally: + calls[0] += 1 + + self.app.conf.CELERY_TASK_SERIALIZER = 'auth' + with mock_open(side_effect=effect): + with patch('celery.security.registry') as registry: + store = Mock() + self.app.setup_security(['json'], key, cert, store) + dis.assert_called_with(['json']) + reg.assert_called_with('A', 'B', store, 'sha1', 'json') + registry._set_default_serializer.assert_called_with('auth') + + def test_security_conf(self): + self.app.conf.CELERY_TASK_SERIALIZER = 'auth' + with self.assertRaises(ImproperlyConfigured): + self.app.setup_security() + + _import = builtins.__import__ + + def import_hook(name, *args, **kwargs): + if name == 'OpenSSL': + raise ImportError + return _import(name, *args, **kwargs) + + builtins.__import__ = import_hook + with self.assertRaises(ImproperlyConfigured): + self.app.setup_security() + builtins.__import__ = _import + + def test_reraise_errors(self): + with self.assertRaises(SecurityError): + with reraise_errors(errors=(KeyError, )): + raise KeyError('foo') + with self.assertRaises(KeyError): + with reraise_errors(errors=(ValueError, )): + raise KeyError('bar') diff --git a/celery/tests/security/test_serialization.py b/celery/tests/security/test_serialization.py new file mode 100644 index 0000000..50bc4bf --- /dev/null +++ b/celery/tests/security/test_serialization.py @@ -0,0 +1,64 @@ +from __future__ import absolute_import + +import os +import base64 + +from kombu.serialization import registry + +from celery.exceptions import SecurityError +from celery.security.serialization import SecureSerializer, register_auth +from celery.security.certificate import Certificate, CertStore +from celery.security.key import PrivateKey + +from . import CERT1, CERT2, KEY1, KEY2 +from .case import SecurityCase + + +class test_SecureSerializer(SecurityCase): + + def _get_s(self, key, cert, certs): + store = CertStore() + for c in certs: + store.add_cert(Certificate(c)) + return SecureSerializer(PrivateKey(key), Certificate(cert), store) + + def test_serialize(self): + s = self._get_s(KEY1, CERT1, [CERT1]) + self.assertEqual(s.deserialize(s.serialize('foo')), 'foo') + + def test_deserialize(self): + s = self._get_s(KEY1, CERT1, [CERT1]) + self.assertRaises(SecurityError, s.deserialize, 'bad data') + + def test_unmatched_key_cert(self): + s = self._get_s(KEY1, CERT2, [CERT1, CERT2]) + self.assertRaises(SecurityError, + s.deserialize, s.serialize('foo')) + + def test_unknown_source(self): + s1 = self._get_s(KEY1, CERT1, [CERT2]) + s2 = self._get_s(KEY1, CERT1, []) + self.assertRaises(SecurityError, + s1.deserialize, s1.serialize('foo')) + self.assertRaises(SecurityError, + s2.deserialize, s2.serialize('foo')) + + def test_self_send(self): + s1 = self._get_s(KEY1, CERT1, [CERT1]) + s2 = self._get_s(KEY1, CERT1, [CERT1]) + self.assertEqual(s2.deserialize(s1.serialize('foo')), 'foo') + + def test_separate_ends(self): + s1 = self._get_s(KEY1, CERT1, [CERT2]) + s2 = self._get_s(KEY2, CERT2, [CERT1]) + self.assertEqual(s2.deserialize(s1.serialize('foo')), 'foo') + + def test_register_auth(self): + register_auth(KEY1, CERT1, '') + self.assertIn('application/data', registry._decoders) + + def test_lots_of_sign(self): + for i in range(1000): + rdata = base64.urlsafe_b64encode(os.urandom(265)) + s = self._get_s(KEY1, CERT1, [CERT1]) + self.assertEqual(s.deserialize(s.serialize(rdata)), rdata) diff --git a/celery/tests/slow/__init__.py b/celery/tests/slow/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/celery/tests/tasks/__init__.py b/celery/tests/tasks/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/celery/tests/tasks/test_canvas.py b/celery/tests/tasks/test_canvas.py new file mode 100644 index 0000000..2508025 --- /dev/null +++ b/celery/tests/tasks/test_canvas.py @@ -0,0 +1,346 @@ +from __future__ import absolute_import + +from celery.canvas import ( + Signature, + chain, + group, + chord, + signature, + xmap, + xstarmap, + chunks, + _maybe_group, + maybe_signature, +) +from celery.result import EagerResult + +from celery.tests.case import AppCase, Mock + +SIG = Signature({'task': 'TASK', + 'args': ('A1', ), + 'kwargs': {'K1': 'V1'}, + 'options': {'task_id': 'TASK_ID'}, + 'subtask_type': ''}) + + +class CanvasCase(AppCase): + + def setup(self): + + @self.app.task(shared=False) + def add(x, y): + return x + y + self.add = add + + @self.app.task(shared=False) + def mul(x, y): + return x * y + self.mul = mul + + @self.app.task(shared=False) + def div(x, y): + return x / y + self.div = div + + +class test_Signature(CanvasCase): + + def test_getitem_property_class(self): + self.assertTrue(Signature.task) + self.assertTrue(Signature.args) + self.assertTrue(Signature.kwargs) + self.assertTrue(Signature.options) + self.assertTrue(Signature.subtask_type) + + def test_getitem_property(self): + self.assertEqual(SIG.task, 'TASK') + self.assertEqual(SIG.args, ('A1', )) + self.assertEqual(SIG.kwargs, {'K1': 'V1'}) + self.assertEqual(SIG.options, {'task_id': 'TASK_ID'}) + self.assertEqual(SIG.subtask_type, '') + + def test_link_on_scalar(self): + x = Signature('TASK', link=Signature('B')) + self.assertTrue(x.options['link']) + x.link(Signature('C')) + self.assertIsInstance(x.options['link'], list) + self.assertIn(Signature('B'), x.options['link']) + self.assertIn(Signature('C'), x.options['link']) + + def test_replace(self): + x = Signature('TASK', ('A'), {}) + self.assertTupleEqual(x.replace(args=('B', )).args, ('B', )) + self.assertDictEqual( + x.replace(kwargs={'FOO': 'BAR'}).kwargs, + {'FOO': 'BAR'}, + ) + self.assertDictEqual( + x.replace(options={'task_id': '123'}).options, + {'task_id': '123'}, + ) + + def test_set(self): + self.assertDictEqual( + Signature('TASK', x=1).set(task_id='2').options, + {'x': 1, 'task_id': '2'}, + ) + + def test_link(self): + x = signature(SIG) + x.link(SIG) + x.link(SIG) + self.assertIn(SIG, x.options['link']) + self.assertEqual(len(x.options['link']), 1) + + def test_link_error(self): + x = signature(SIG) + x.link_error(SIG) + x.link_error(SIG) + self.assertIn(SIG, x.options['link_error']) + self.assertEqual(len(x.options['link_error']), 1) + + def test_flatten_links(self): + tasks = [self.add.s(2, 2), self.mul.s(4), self.div.s(2)] + tasks[0].link(tasks[1]) + tasks[1].link(tasks[2]) + self.assertEqual(tasks[0].flatten_links(), tasks) + + def test_OR(self): + x = self.add.s(2, 2) | self.mul.s(4) + self.assertIsInstance(x, chain) + y = self.add.s(4, 4) | self.div.s(2) + z = x | y + self.assertIsInstance(y, chain) + self.assertIsInstance(z, chain) + self.assertEqual(len(z.tasks), 4) + with self.assertRaises(TypeError): + x | 10 + ax = self.add.s(2, 2) | (self.add.s(4) | self.add.s(8)) + self.assertIsInstance(ax, chain) + self.assertEqual(len(ax.tasks), 3, 'consolidates chain to chain') + + def test_INVERT(self): + x = self.add.s(2, 2) + x.apply_async = Mock() + x.apply_async.return_value = Mock() + x.apply_async.return_value.get = Mock() + x.apply_async.return_value.get.return_value = 4 + self.assertEqual(~x, 4) + self.assertTrue(x.apply_async.called) + + def test_merge_immutable(self): + x = self.add.si(2, 2, foo=1) + args, kwargs, options = x._merge((4, ), {'bar': 2}, {'task_id': 3}) + self.assertTupleEqual(args, (2, 2)) + self.assertDictEqual(kwargs, {'foo': 1}) + self.assertDictEqual(options, {'task_id': 3}) + + def test_set_immutable(self): + x = self.add.s(2, 2) + self.assertFalse(x.immutable) + x.set(immutable=True) + self.assertTrue(x.immutable) + x.set(immutable=False) + self.assertFalse(x.immutable) + + def test_election(self): + x = self.add.s(2, 2) + x.freeze('foo') + x.type.app.control = Mock() + r = x.election() + self.assertTrue(x.type.app.control.election.called) + self.assertEqual(r.id, 'foo') + + def test_AsyncResult_when_not_registered(self): + s = signature('xxx.not.registered', app=self.app) + self.assertTrue(s.AsyncResult) + + def test_apply_async_when_not_registered(self): + s = signature('xxx.not.registered', app=self.app) + self.assertTrue(s._apply_async) + + +class test_xmap_xstarmap(CanvasCase): + + def test_apply(self): + for type, attr in [(xmap, 'map'), (xstarmap, 'starmap')]: + args = [(i, i) for i in range(10)] + s = getattr(self.add, attr)(args) + s.type = Mock() + + s.apply_async(foo=1) + s.type.apply_async.assert_called_with( + (), {'task': self.add.s(), 'it': args}, foo=1, + ) + + self.assertEqual(type.from_dict(dict(s)), s) + self.assertTrue(repr(s)) + + +class test_chunks(CanvasCase): + + def test_chunks(self): + x = self.add.chunks(range(100), 10) + self.assertEqual( + dict(chunks.from_dict(dict(x), app=self.app)), dict(x), + ) + + self.assertTrue(x.group()) + self.assertEqual(len(x.group().tasks), 10) + + x.group = Mock() + gr = x.group.return_value = Mock() + + x.apply_async() + gr.apply_async.assert_called_with((), {}) + + x() + gr.assert_called_with() + + self.app.conf.CELERY_ALWAYS_EAGER = True + chunks.apply_chunks(app=self.app, **x['kwargs']) + + +class test_chain(CanvasCase): + + def test_repr(self): + x = self.add.s(2, 2) | self.add.s(2) + self.assertEqual( + repr(x), '%s(2, 2) | %s(2)' % (self.add.name, self.add.name), + ) + + def test_reverse(self): + x = self.add.s(2, 2) | self.add.s(2) + self.assertIsInstance(signature(x), chain) + self.assertIsInstance(signature(dict(x)), chain) + + def test_always_eager(self): + self.app.conf.CELERY_ALWAYS_EAGER = True + self.assertEqual(~(self.add.s(4, 4) | self.add.s(8)), 16) + + def test_apply(self): + x = chain(self.add.s(4, 4), self.add.s(8), self.add.s(10)) + res = x.apply() + self.assertIsInstance(res, EagerResult) + self.assertEqual(res.get(), 26) + + self.assertEqual(res.parent.get(), 16) + self.assertEqual(res.parent.parent.get(), 8) + self.assertIsNone(res.parent.parent.parent) + + def test_empty_chain_returns_none(self): + self.assertIsNone(chain(app=self.app)()) + self.assertIsNone(chain(app=self.app).apply_async()) + + def test_call_no_tasks(self): + x = chain() + self.assertFalse(x()) + + def test_call_with_tasks(self): + x = self.add.s(2, 2) | self.add.s(4) + x.apply_async = Mock() + x(2, 2, foo=1) + x.apply_async.assert_called_with((2, 2), {'foo': 1}) + + def test_from_dict_no_args__with_args(self): + x = dict(self.add.s(2, 2) | self.add.s(4)) + x['args'] = None + self.assertIsInstance(chain.from_dict(x), chain) + x['args'] = (2, ) + self.assertIsInstance(chain.from_dict(x), chain) + + def test_accepts_generator_argument(self): + x = chain(self.add.s(i) for i in range(10)) + self.assertTrue(x.tasks[0].type, self.add) + self.assertTrue(x.type) + + +class test_group(CanvasCase): + + def test_repr(self): + x = group([self.add.s(2, 2), self.add.s(4, 4)]) + self.assertEqual(repr(x), repr(x.tasks)) + + def test_reverse(self): + x = group([self.add.s(2, 2), self.add.s(4, 4)]) + self.assertIsInstance(signature(x), group) + self.assertIsInstance(signature(dict(x)), group) + + def test_maybe_group_sig(self): + self.assertListEqual( + _maybe_group(self.add.s(2, 2)), [self.add.s(2, 2)], + ) + + def test_from_dict(self): + x = group([self.add.s(2, 2), self.add.s(4, 4)]) + x['args'] = (2, 2) + self.assertTrue(group.from_dict(dict(x))) + x['args'] = None + self.assertTrue(group.from_dict(dict(x))) + + def test_call_empty_group(self): + x = group(app=self.app) + self.assertFalse(len(x())) + x.delay() + x.apply_async() + x() + + def test_skew(self): + g = group([self.add.s(i, i) for i in range(10)]) + g.skew(start=1, stop=10, step=1) + for i, task in enumerate(g.tasks): + self.assertEqual(task.options['countdown'], i + 1) + + def test_iter(self): + g = group([self.add.s(i, i) for i in range(10)]) + self.assertListEqual(list(iter(g)), g.tasks) + + +class test_chord(CanvasCase): + + def test_reverse(self): + x = chord([self.add.s(2, 2), self.add.s(4, 4)], body=self.mul.s(4)) + self.assertIsInstance(signature(x), chord) + self.assertIsInstance(signature(dict(x)), chord) + + def test_clone_clones_body(self): + x = chord([self.add.s(2, 2), self.add.s(4, 4)], body=self.mul.s(4)) + y = x.clone() + self.assertIsNot(x.kwargs['body'], y.kwargs['body']) + y.kwargs.pop('body') + z = y.clone() + self.assertIsNone(z.kwargs.get('body')) + + def test_links_to_body(self): + x = chord([self.add.s(2, 2), self.add.s(4, 4)], body=self.mul.s(4)) + x.link(self.div.s(2)) + self.assertFalse(x.options.get('link')) + self.assertTrue(x.kwargs['body'].options['link']) + + x.link_error(self.div.s(2)) + self.assertFalse(x.options.get('link_error')) + self.assertTrue(x.kwargs['body'].options['link_error']) + + self.assertTrue(x.tasks) + self.assertTrue(x.body) + + def test_repr(self): + x = chord([self.add.s(2, 2), self.add.s(4, 4)], body=self.mul.s(4)) + self.assertTrue(repr(x)) + x.kwargs['body'] = None + self.assertIn('without body', repr(x)) + + +class test_maybe_signature(CanvasCase): + + def test_is_None(self): + self.assertIsNone(maybe_signature(None, app=self.app)) + + def test_is_dict(self): + self.assertIsInstance( + maybe_signature(dict(self.add.s()), app=self.app), Signature, + ) + + def test_when_sig(self): + s = self.add.s() + self.assertIs(maybe_signature(s, app=self.app), s) diff --git a/celery/tests/tasks/test_chord.py b/celery/tests/tasks/test_chord.py new file mode 100644 index 0000000..47e7718 --- /dev/null +++ b/celery/tests/tasks/test_chord.py @@ -0,0 +1,233 @@ +from __future__ import absolute_import + +from contextlib import contextmanager + +from celery import group +from celery import canvas +from celery import result +from celery.exceptions import ChordError, Retry +from celery.five import range +from celery.result import AsyncResult, GroupResult, EagerResult +from celery.tests.case import AppCase, Mock + +passthru = lambda x: x + + +class ChordCase(AppCase): + + def setup(self): + + @self.app.task(shared=False) + def add(x, y): + return x + y + self.add = add + + +class TSR(GroupResult): + is_ready = True + value = None + + def ready(self): + return self.is_ready + + def join(self, propagate=True, **kwargs): + if propagate: + for value in self.value: + if isinstance(value, Exception): + raise value + return self.value + join_native = join + + def _failed_join_report(self): + for value in self.value: + if isinstance(value, Exception): + yield EagerResult('some_id', value, 'FAILURE') + + +class TSRNoReport(TSR): + + def _failed_join_report(self): + return iter([]) + + +@contextmanager +def patch_unlock_retry(app): + unlock = app.tasks['celery.chord_unlock'] + retry = Mock() + retry.return_value = Retry() + prev, unlock.retry = unlock.retry, retry + try: + yield unlock, retry + finally: + unlock.retry = prev + + +class test_unlock_chord_task(ChordCase): + + def test_unlock_ready(self): + + class AlwaysReady(TSR): + is_ready = True + value = [2, 4, 8, 6] + + with self._chord_context(AlwaysReady) as (cb, retry, _): + cb.type.apply_async.assert_called_with( + ([2, 4, 8, 6], ), {}, task_id=cb.id, + ) + # did not retry + self.assertFalse(retry.call_count) + + def test_callback_fails(self): + + class AlwaysReady(TSR): + is_ready = True + value = [2, 4, 8, 6] + + def setup(callback): + callback.apply_async.side_effect = IOError() + + with self._chord_context(AlwaysReady, setup) as (cb, retry, fail): + self.assertTrue(fail.called) + self.assertEqual( + fail.call_args[0][0], cb.id, + ) + self.assertIsInstance( + fail.call_args[1]['exc'], ChordError, + ) + + def test_unlock_ready_failed(self): + + class Failed(TSR): + is_ready = True + value = [2, KeyError('foo'), 8, 6] + + with self._chord_context(Failed) as (cb, retry, fail_current): + self.assertFalse(cb.type.apply_async.called) + # did not retry + self.assertFalse(retry.call_count) + self.assertTrue(fail_current.called) + self.assertEqual( + fail_current.call_args[0][0], cb.id, + ) + self.assertIsInstance( + fail_current.call_args[1]['exc'], ChordError, + ) + self.assertIn('some_id', str(fail_current.call_args[1]['exc'])) + + def test_unlock_ready_failed_no_culprit(self): + class Failed(TSRNoReport): + is_ready = True + value = [2, KeyError('foo'), 8, 6] + + with self._chord_context(Failed) as (cb, retry, fail_current): + self.assertTrue(fail_current.called) + self.assertEqual( + fail_current.call_args[0][0], cb.id, + ) + self.assertIsInstance( + fail_current.call_args[1]['exc'], ChordError, + ) + + @contextmanager + def _chord_context(self, ResultCls, setup=None, **kwargs): + @self.app.task(shared=False) + def callback(*args, **kwargs): + pass + self.app.finalize() + + pts, result.GroupResult = result.GroupResult, ResultCls + callback.apply_async = Mock() + callback_s = callback.s() + callback_s.id = 'callback_id' + fail_current = self.app.backend.fail_from_current_stack = Mock() + try: + with patch_unlock_retry(self.app) as (unlock, retry): + subtask, canvas.maybe_signature = ( + canvas.maybe_signature, passthru, + ) + if setup: + setup(callback) + try: + assert self.app.tasks['celery.chord_unlock'] is unlock + try: + unlock( + 'group_id', callback_s, + result=[ + self.app.AsyncResult(r) for r in ['1', 2, 3] + ], + GroupResult=ResultCls, **kwargs + ) + except Retry: + pass + finally: + canvas.maybe_signature = subtask + yield callback_s, retry, fail_current + finally: + result.GroupResult = pts + + def test_when_not_ready(self): + class NeverReady(TSR): + is_ready = False + + with self._chord_context(NeverReady, interval=10, max_retries=30) \ + as (cb, retry, _): + self.assertFalse(cb.type.apply_async.called) + # did retry + retry.assert_called_with(countdown=10, max_retries=30) + + def test_is_in_registry(self): + self.assertIn('celery.chord_unlock', self.app.tasks) + + +class test_chord(ChordCase): + + def test_eager(self): + from celery import chord + + @self.app.task(shared=False) + def addX(x, y): + return x + y + + @self.app.task(shared=False) + def sumX(n): + return sum(n) + + self.app.conf.CELERY_ALWAYS_EAGER = True + x = chord(addX.s(i, i) for i in range(10)) + body = sumX.s() + result = x(body) + self.assertEqual(result.get(), sum(i + i for i in range(10))) + + def test_apply(self): + self.app.conf.CELERY_ALWAYS_EAGER = False + from celery import chord + + m = Mock() + m.app.conf.CELERY_ALWAYS_EAGER = False + m.AsyncResult = AsyncResult + prev, chord._type = chord._type, m + try: + x = chord(self.add.s(i, i) for i in range(10)) + body = self.add.s(2) + result = x(body) + self.assertTrue(result.id) + # does not modify original subtask + with self.assertRaises(KeyError): + body.options['task_id'] + self.assertTrue(chord._type.called) + finally: + chord._type = prev + + +class test_Chord_task(ChordCase): + + def test_run(self): + self.app.backend = Mock() + self.app.backend.cleanup = Mock() + self.app.backend.cleanup.__name__ = 'cleanup' + Chord = self.app.tasks['celery.chord'] + + body = dict() + Chord(group(self.add.subtask((i, i)) for i in range(5)), body) + Chord([self.add.subtask((j, j)) for j in range(5)], body) + self.assertEqual(self.app.backend.apply_chord.call_count, 2) diff --git a/celery/tests/tasks/test_context.py b/celery/tests/tasks/test_context.py new file mode 100644 index 0000000..ecad3f8 --- /dev/null +++ b/celery/tests/tasks/test_context.py @@ -0,0 +1,67 @@ +# -*- coding: utf-8 -*-' +from __future__ import absolute_import + +from celery.app.task import Context +from celery.tests.case import AppCase + + +# Retreive the values of all context attributes as a +# dictionary in an implementation-agnostic manner. +def get_context_as_dict(ctx, getter=getattr): + defaults = {} + for attr_name in dir(ctx): + if attr_name.startswith('_'): + continue # Ignore pseudo-private attributes + attr = getter(ctx, attr_name) + if callable(attr): + continue # Ignore methods and other non-trivial types + defaults[attr_name] = attr + return defaults +default_context = get_context_as_dict(Context()) + + +class test_Context(AppCase): + + def test_default_context(self): + # A bit of a tautological test, since it uses the same + # initializer as the default_context constructor. + defaults = dict(default_context, children=[]) + self.assertDictEqual(get_context_as_dict(Context()), defaults) + + def test_updated_context(self): + expected = dict(default_context) + changes = dict(id='unique id', args=['some', 1], wibble='wobble') + ctx = Context() + expected.update(changes) + ctx.update(changes) + self.assertDictEqual(get_context_as_dict(ctx), expected) + self.assertDictEqual(get_context_as_dict(Context()), default_context) + + def test_modified_context(self): + expected = dict(default_context) + ctx = Context() + expected['id'] = 'unique id' + expected['args'] = ['some', 1] + ctx.id = 'unique id' + ctx.args = ['some', 1] + self.assertDictEqual(get_context_as_dict(ctx), expected) + self.assertDictEqual(get_context_as_dict(Context()), default_context) + + def test_cleared_context(self): + changes = dict(id='unique id', args=['some', 1], wibble='wobble') + ctx = Context() + ctx.update(changes) + ctx.clear() + defaults = dict(default_context, children=[]) + self.assertDictEqual(get_context_as_dict(ctx), defaults) + self.assertDictEqual(get_context_as_dict(Context()), defaults) + + def test_context_get(self): + expected = dict(default_context) + changes = dict(id='unique id', args=['some', 1], wibble='wobble') + ctx = Context() + expected.update(changes) + ctx.update(changes) + ctx_dict = get_context_as_dict(ctx, getter=Context.get) + self.assertDictEqual(ctx_dict, expected) + self.assertDictEqual(get_context_as_dict(Context()), default_context) diff --git a/celery/tests/tasks/test_result.py b/celery/tests/tasks/test_result.py new file mode 100644 index 0000000..61bf09d --- /dev/null +++ b/celery/tests/tasks/test_result.py @@ -0,0 +1,728 @@ +from __future__ import absolute_import + +from contextlib import contextmanager + +from celery import states +from celery.exceptions import IncompleteStream, TimeoutError +from celery.five import range +from celery.result import ( + AsyncResult, + EagerResult, + TaskSetResult, + result_from_tuple, +) +from celery.utils import uuid +from celery.utils.serialization import pickle + +from celery.tests.case import AppCase, Mock, depends_on_current_app, patch + + +def mock_task(name, state, result): + return dict(id=uuid(), name=name, state=state, result=result) + + +def save_result(app, task): + traceback = 'Some traceback' + if task['state'] == states.SUCCESS: + app.backend.mark_as_done(task['id'], task['result']) + elif task['state'] == states.RETRY: + app.backend.mark_as_retry( + task['id'], task['result'], traceback=traceback, + ) + else: + app.backend.mark_as_failure( + task['id'], task['result'], traceback=traceback, + ) + + +def make_mock_group(app, size=10): + tasks = [mock_task('ts%d' % i, states.SUCCESS, i) for i in range(size)] + [save_result(app, task) for task in tasks] + return [app.AsyncResult(task['id']) for task in tasks] + + +class test_AsyncResult(AppCase): + + def setup(self): + self.task1 = mock_task('task1', states.SUCCESS, 'the') + self.task2 = mock_task('task2', states.SUCCESS, 'quick') + self.task3 = mock_task('task3', states.FAILURE, KeyError('brown')) + self.task4 = mock_task('task3', states.RETRY, KeyError('red')) + + for task in (self.task1, self.task2, self.task3, self.task4): + save_result(self.app, task) + + @self.app.task(shared=False) + def mytask(): + pass + self.mytask = mytask + + def test_compat_properties(self): + x = self.app.AsyncResult('1') + self.assertEqual(x.task_id, x.id) + x.task_id = '2' + self.assertEqual(x.id, '2') + + def test_children(self): + x = self.app.AsyncResult('1') + children = [EagerResult(str(i), i, states.SUCCESS) for i in range(3)] + x._cache = {'children': children, 'status': states.SUCCESS} + x.backend = Mock() + self.assertTrue(x.children) + self.assertEqual(len(x.children), 3) + + def test_propagates_for_parent(self): + x = self.app.AsyncResult(uuid()) + x.backend = Mock() + x.backend.get_task_meta.return_value = {} + x.parent = EagerResult(uuid(), KeyError('foo'), states.FAILURE) + with self.assertRaises(KeyError): + x.get(propagate=True) + self.assertFalse(x.backend.wait_for.called) + + x.parent = EagerResult(uuid(), 42, states.SUCCESS) + x.get(propagate=True) + self.assertTrue(x.backend.wait_for.called) + + def test_get_children(self): + tid = uuid() + x = self.app.AsyncResult(tid) + child = [self.app.AsyncResult(uuid()).as_tuple() + for i in range(10)] + x._cache = {'children': child} + self.assertTrue(x.children) + self.assertEqual(len(x.children), 10) + + x._cache = {'status': states.SUCCESS} + x.backend._cache[tid] = {'result': None} + self.assertIsNone(x.children) + + def test_build_graph_get_leaf_collect(self): + x = self.app.AsyncResult('1') + x.backend._cache['1'] = {'status': states.SUCCESS, 'result': None} + c = [EagerResult(str(i), i, states.SUCCESS) for i in range(3)] + x.iterdeps = Mock() + x.iterdeps.return_value = ( + (None, x), + (x, c[0]), + (c[0], c[1]), + (c[1], c[2]) + ) + x.backend.READY_STATES = states.READY_STATES + self.assertTrue(x.graph) + + self.assertIs(x.get_leaf(), 2) + + it = x.collect() + self.assertListEqual(list(it), [ + (x, None), + (c[0], 0), + (c[1], 1), + (c[2], 2), + ]) + + def test_iterdeps(self): + x = self.app.AsyncResult('1') + c = [EagerResult(str(i), i, states.SUCCESS) for i in range(3)] + x._cache = {'status': states.SUCCESS, 'result': None, 'children': c} + for child in c: + child.backend = Mock() + child.backend.get_children.return_value = [] + it = x.iterdeps() + self.assertListEqual(list(it), [ + (None, x), + (x, c[0]), + (x, c[1]), + (x, c[2]), + ]) + x._cache = None + x.ready = Mock() + x.ready.return_value = False + with self.assertRaises(IncompleteStream): + list(x.iterdeps()) + list(x.iterdeps(intermediate=True)) + + def test_eq_not_implemented(self): + self.assertFalse(self.app.AsyncResult('1') == object()) + + @depends_on_current_app + def test_reduce(self): + a1 = self.app.AsyncResult('uuid', task_name=self.mytask.name) + restored = pickle.loads(pickle.dumps(a1)) + self.assertEqual(restored.id, 'uuid') + self.assertEqual(restored.task_name, self.mytask.name) + + a2 = self.app.AsyncResult('uuid') + self.assertEqual(pickle.loads(pickle.dumps(a2)).id, 'uuid') + + def test_successful(self): + ok_res = self.app.AsyncResult(self.task1['id']) + nok_res = self.app.AsyncResult(self.task3['id']) + nok_res2 = self.app.AsyncResult(self.task4['id']) + + self.assertTrue(ok_res.successful()) + self.assertFalse(nok_res.successful()) + self.assertFalse(nok_res2.successful()) + + pending_res = self.app.AsyncResult(uuid()) + self.assertFalse(pending_res.successful()) + + def test_str(self): + ok_res = self.app.AsyncResult(self.task1['id']) + ok2_res = self.app.AsyncResult(self.task2['id']) + nok_res = self.app.AsyncResult(self.task3['id']) + self.assertEqual(str(ok_res), self.task1['id']) + self.assertEqual(str(ok2_res), self.task2['id']) + self.assertEqual(str(nok_res), self.task3['id']) + + pending_id = uuid() + pending_res = self.app.AsyncResult(pending_id) + self.assertEqual(str(pending_res), pending_id) + + def test_repr(self): + ok_res = self.app.AsyncResult(self.task1['id']) + ok2_res = self.app.AsyncResult(self.task2['id']) + nok_res = self.app.AsyncResult(self.task3['id']) + self.assertEqual(repr(ok_res), '' % ( + self.task1['id'])) + self.assertEqual(repr(ok2_res), '' % ( + self.task2['id'])) + self.assertEqual(repr(nok_res), '' % ( + self.task3['id'])) + + pending_id = uuid() + pending_res = self.app.AsyncResult(pending_id) + self.assertEqual(repr(pending_res), '' % ( + pending_id)) + + def test_hash(self): + self.assertEqual(hash(self.app.AsyncResult('x0w991')), + hash(self.app.AsyncResult('x0w991'))) + self.assertNotEqual(hash(self.app.AsyncResult('x0w991')), + hash(self.app.AsyncResult('x1w991'))) + + def test_get_traceback(self): + ok_res = self.app.AsyncResult(self.task1['id']) + nok_res = self.app.AsyncResult(self.task3['id']) + nok_res2 = self.app.AsyncResult(self.task4['id']) + self.assertFalse(ok_res.traceback) + self.assertTrue(nok_res.traceback) + self.assertTrue(nok_res2.traceback) + + pending_res = self.app.AsyncResult(uuid()) + self.assertFalse(pending_res.traceback) + + def test_get(self): + ok_res = self.app.AsyncResult(self.task1['id']) + ok2_res = self.app.AsyncResult(self.task2['id']) + nok_res = self.app.AsyncResult(self.task3['id']) + nok2_res = self.app.AsyncResult(self.task4['id']) + + self.assertEqual(ok_res.get(), 'the') + self.assertEqual(ok2_res.get(), 'quick') + with self.assertRaises(KeyError): + nok_res.get() + self.assertTrue(nok_res.get(propagate=False)) + self.assertIsInstance(nok2_res.result, KeyError) + self.assertEqual(ok_res.info, 'the') + + def test_get_timeout(self): + res = self.app.AsyncResult(self.task4['id']) # has RETRY state + with self.assertRaises(TimeoutError): + res.get(timeout=0.001) + + pending_res = self.app.AsyncResult(uuid()) + with patch('celery.result.time') as _time: + with self.assertRaises(TimeoutError): + pending_res.get(timeout=0.001, interval=0.001) + _time.sleep.assert_called_with(0.001) + + def test_get_timeout_longer(self): + res = self.app.AsyncResult(self.task4['id']) # has RETRY state + with patch('celery.result.time') as _time: + with self.assertRaises(TimeoutError): + res.get(timeout=1, interval=1) + _time.sleep.assert_called_with(1) + + def test_ready(self): + oks = (self.app.AsyncResult(self.task1['id']), + self.app.AsyncResult(self.task2['id']), + self.app.AsyncResult(self.task3['id'])) + self.assertTrue(all(result.ready() for result in oks)) + self.assertFalse(self.app.AsyncResult(self.task4['id']).ready()) + + self.assertFalse(self.app.AsyncResult(uuid()).ready()) + + +class test_ResultSet(AppCase): + + def test_resultset_repr(self): + self.assertTrue(repr(self.app.ResultSet( + [self.app.AsyncResult(t) for t in ['1', '2', '3']]))) + + def test_eq_other(self): + self.assertFalse(self.app.ResultSet([1, 3, 3]) == 1) + self.assertTrue(self.app.ResultSet([1]) == self.app.ResultSet([1])) + + def test_get(self): + x = self.app.ResultSet([self.app.AsyncResult(t) for t in [1, 2, 3]]) + b = x.results[0].backend = Mock() + b.supports_native_join = False + x.join_native = Mock() + x.join = Mock() + x.get() + self.assertTrue(x.join.called) + b.supports_native_join = True + x.get() + self.assertTrue(x.join_native.called) + + def test_get_empty(self): + x = self.app.ResultSet([]) + self.assertIsNone(x.supports_native_join) + x.join = Mock(name='join') + x.get() + self.assertTrue(x.join.called) + + def test_add(self): + x = self.app.ResultSet([1]) + x.add(2) + self.assertEqual(len(x), 2) + x.add(2) + self.assertEqual(len(x), 2) + + @contextmanager + def dummy_copy(self): + with patch('celery.result.copy') as copy: + + def passt(arg): + return arg + copy.side_effect = passt + + yield + + def test_iterate_respects_subpolling_interval(self): + r1 = self.app.AsyncResult(uuid()) + r2 = self.app.AsyncResult(uuid()) + backend = r1.backend = r2.backend = Mock() + backend.subpolling_interval = 10 + + ready = r1.ready = r2.ready = Mock() + + def se(*args, **kwargs): + ready.side_effect = KeyError() + return False + ready.return_value = False + ready.side_effect = se + + x = self.app.ResultSet([r1, r2]) + with self.dummy_copy(): + with patch('celery.result.time') as _time: + with self.assertPendingDeprecation(): + with self.assertRaises(KeyError): + list(x.iterate()) + _time.sleep.assert_called_with(10) + + backend.subpolling_interval = 0 + with patch('celery.result.time') as _time: + with self.assertPendingDeprecation(): + with self.assertRaises(KeyError): + ready.return_value = False + ready.side_effect = se + list(x.iterate()) + self.assertFalse(_time.sleep.called) + + def test_times_out(self): + r1 = self.app.AsyncResult(uuid) + r1.ready = Mock() + r1.ready.return_value = False + x = self.app.ResultSet([r1]) + with self.dummy_copy(): + with patch('celery.result.time'): + with self.assertPendingDeprecation(): + with self.assertRaises(TimeoutError): + list(x.iterate(timeout=1)) + + def test_add_discard(self): + x = self.app.ResultSet([]) + x.add(self.app.AsyncResult('1')) + self.assertIn(self.app.AsyncResult('1'), x.results) + x.discard(self.app.AsyncResult('1')) + x.discard(self.app.AsyncResult('1')) + x.discard('1') + self.assertNotIn(self.app.AsyncResult('1'), x.results) + + x.update([self.app.AsyncResult('2')]) + + def test_clear(self): + x = self.app.ResultSet([]) + r = x.results + x.clear() + self.assertIs(x.results, r) + + +class MockAsyncResultFailure(AsyncResult): + + @property + def result(self): + return KeyError('baz') + + @property + def state(self): + return states.FAILURE + + def get(self, propagate=True, **kwargs): + if propagate: + raise self.result + return self.result + + +class MockAsyncResultSuccess(AsyncResult): + forgotten = False + + def forget(self): + self.forgotten = True + + @property + def result(self): + return 42 + + @property + def state(self): + return states.SUCCESS + + def get(self, **kwargs): + return self.result + + +class SimpleBackend(object): + ids = [] + + def __init__(self, ids=[]): + self.ids = ids + + def get_many(self, *args, **kwargs): + return ((id, {'result': i, 'status': states.SUCCESS}) + for i, id in enumerate(self.ids)) + + +class test_TaskSetResult(AppCase): + + def setup(self): + self.size = 10 + self.ts = TaskSetResult(uuid(), make_mock_group(self.app, self.size)) + + def test_total(self): + self.assertEqual(self.ts.total, self.size) + + def test_compat_properties(self): + self.assertEqual(self.ts.taskset_id, self.ts.id) + self.ts.taskset_id = 'foo' + self.assertEqual(self.ts.taskset_id, 'foo') + + def test_compat_subtasks_kwarg(self): + x = TaskSetResult(uuid(), subtasks=[1, 2, 3]) + self.assertEqual(x.results, [1, 2, 3]) + + def test_itersubtasks(self): + it = self.ts.itersubtasks() + + for i, t in enumerate(it): + self.assertEqual(t.get(), i) + + +class test_GroupResult(AppCase): + + def setup(self): + self.size = 10 + self.ts = self.app.GroupResult( + uuid(), make_mock_group(self.app, self.size), + ) + + @depends_on_current_app + def test_is_pickleable(self): + ts = self.app.GroupResult(uuid(), [self.app.AsyncResult(uuid())]) + self.assertEqual(pickle.loads(pickle.dumps(ts)), ts) + ts2 = self.app.GroupResult(uuid(), [self.app.AsyncResult(uuid())]) + self.assertEqual(pickle.loads(pickle.dumps(ts2)), ts2) + + def test_len(self): + self.assertEqual(len(self.ts), self.size) + + def test_eq_other(self): + self.assertFalse(self.ts == 1) + + @depends_on_current_app + def test_reduce(self): + self.assertTrue(pickle.loads(pickle.dumps(self.ts))) + + def test_iterate_raises(self): + ar = MockAsyncResultFailure(uuid(), app=self.app) + ts = self.app.GroupResult(uuid(), [ar]) + with self.assertPendingDeprecation(): + it = ts.iterate() + with self.assertRaises(KeyError): + next(it) + + def test_forget(self): + subs = [MockAsyncResultSuccess(uuid(), app=self.app), + MockAsyncResultSuccess(uuid(), app=self.app)] + ts = self.app.GroupResult(uuid(), subs) + ts.forget() + for sub in subs: + self.assertTrue(sub.forgotten) + + def test_getitem(self): + subs = [MockAsyncResultSuccess(uuid(), app=self.app), + MockAsyncResultSuccess(uuid(), app=self.app)] + ts = self.app.GroupResult(uuid(), subs) + self.assertIs(ts[0], subs[0]) + + def test_save_restore(self): + subs = [MockAsyncResultSuccess(uuid(), app=self.app), + MockAsyncResultSuccess(uuid(), app=self.app)] + ts = self.app.GroupResult(uuid(), subs) + ts.save() + with self.assertRaises(AttributeError): + ts.save(backend=object()) + self.assertEqual(self.app.GroupResult.restore(ts.id).subtasks, + ts.subtasks) + ts.delete() + self.assertIsNone(self.app.GroupResult.restore(ts.id)) + with self.assertRaises(AttributeError): + self.app.GroupResult.restore(ts.id, backend=object()) + + def test_join_native(self): + backend = SimpleBackend() + subtasks = [self.app.AsyncResult(uuid(), backend=backend) + for i in range(10)] + ts = self.app.GroupResult(uuid(), subtasks) + ts.app.backend = backend + backend.ids = [subtask.id for subtask in subtasks] + res = ts.join_native() + self.assertEqual(res, list(range(10))) + + def test_join_native_raises(self): + ts = self.app.GroupResult(uuid(), [self.app.AsyncResult(uuid())]) + ts.iter_native = Mock() + ts.iter_native.return_value = iter([ + (uuid(), {'status': states.FAILURE, 'result': KeyError()}) + ]) + with self.assertRaises(KeyError): + ts.join_native(propagate=True) + + def test_failed_join_report(self): + res = Mock() + ts = self.app.GroupResult(uuid(), [res]) + res.state = states.FAILURE + res.backend.is_cached.return_value = True + self.assertIs(next(ts._failed_join_report()), res) + res.backend.is_cached.return_value = False + with self.assertRaises(StopIteration): + next(ts._failed_join_report()) + + def test_repr(self): + self.assertTrue(repr( + self.app.GroupResult(uuid(), [self.app.AsyncResult(uuid())]) + )) + + def test_children_is_results(self): + ts = self.app.GroupResult(uuid(), [self.app.AsyncResult(uuid())]) + self.assertIs(ts.children, ts.results) + + def test_iter_native(self): + backend = SimpleBackend() + subtasks = [self.app.AsyncResult(uuid(), backend=backend) + for i in range(10)] + ts = self.app.GroupResult(uuid(), subtasks) + ts.app.backend = backend + backend.ids = [subtask.id for subtask in subtasks] + self.assertEqual(len(list(ts.iter_native())), 10) + + def test_iterate_yields(self): + ar = MockAsyncResultSuccess(uuid(), app=self.app) + ar2 = MockAsyncResultSuccess(uuid(), app=self.app) + ts = self.app.GroupResult(uuid(), [ar, ar2]) + with self.assertPendingDeprecation(): + it = ts.iterate() + self.assertEqual(next(it), 42) + self.assertEqual(next(it), 42) + + def test_iterate_eager(self): + ar1 = EagerResult(uuid(), 42, states.SUCCESS) + ar2 = EagerResult(uuid(), 42, states.SUCCESS) + ts = self.app.GroupResult(uuid(), [ar1, ar2]) + with self.assertPendingDeprecation(): + it = ts.iterate() + self.assertEqual(next(it), 42) + self.assertEqual(next(it), 42) + + def test_join_timeout(self): + ar = MockAsyncResultSuccess(uuid(), app=self.app) + ar2 = MockAsyncResultSuccess(uuid(), app=self.app) + ar3 = self.app.AsyncResult(uuid()) + ts = self.app.GroupResult(uuid(), [ar, ar2, ar3]) + with self.assertRaises(TimeoutError): + ts.join(timeout=0.0000001) + + ar4 = self.app.AsyncResult(uuid()) + ar4.get = Mock() + ts2 = self.app.GroupResult(uuid(), [ar4]) + self.assertTrue(ts2.join(timeout=0.1)) + + def test_iter_native_when_empty_group(self): + ts = self.app.GroupResult(uuid(), []) + self.assertListEqual(list(ts.iter_native()), []) + + def test_iterate_simple(self): + with self.assertPendingDeprecation(): + it = self.ts.iterate() + results = sorted(list(it)) + self.assertListEqual(results, list(range(self.size))) + + def test___iter__(self): + self.assertListEqual(list(iter(self.ts)), self.ts.results) + + def test_join(self): + joined = self.ts.join() + self.assertListEqual(joined, list(range(self.size))) + + def test_successful(self): + self.assertTrue(self.ts.successful()) + + def test_failed(self): + self.assertFalse(self.ts.failed()) + + def test_waiting(self): + self.assertFalse(self.ts.waiting()) + + def test_ready(self): + self.assertTrue(self.ts.ready()) + + def test_completed_count(self): + self.assertEqual(self.ts.completed_count(), len(self.ts)) + + +class test_pending_AsyncResult(AppCase): + + def setup(self): + self.task = self.app.AsyncResult(uuid()) + + def test_result(self): + self.assertIsNone(self.task.result) + + +class test_failed_AsyncResult(test_GroupResult): + + def setup(self): + self.size = 11 + subtasks = make_mock_group(self.app, 10) + failed = mock_task('ts11', states.FAILURE, KeyError('Baz')) + save_result(self.app, failed) + failed_res = self.app.AsyncResult(failed['id']) + self.ts = self.app.GroupResult(uuid(), subtasks + [failed_res]) + + def test_completed_count(self): + self.assertEqual(self.ts.completed_count(), len(self.ts) - 1) + + def test_iterate_simple(self): + with self.assertPendingDeprecation(): + it = self.ts.iterate() + + def consume(): + return list(it) + + with self.assertRaises(KeyError): + consume() + + def test_join(self): + with self.assertRaises(KeyError): + self.ts.join() + + def test_successful(self): + self.assertFalse(self.ts.successful()) + + def test_failed(self): + self.assertTrue(self.ts.failed()) + + +class test_pending_Group(AppCase): + + def setup(self): + self.ts = self.app.GroupResult( + uuid(), [self.app.AsyncResult(uuid()), + self.app.AsyncResult(uuid())]) + + def test_completed_count(self): + self.assertEqual(self.ts.completed_count(), 0) + + def test_ready(self): + self.assertFalse(self.ts.ready()) + + def test_waiting(self): + self.assertTrue(self.ts.waiting()) + + def x_join(self): + with self.assertRaises(TimeoutError): + self.ts.join(timeout=0.001) + + def x_join_longer(self): + with self.assertRaises(TimeoutError): + self.ts.join(timeout=1) + + +class test_EagerResult(AppCase): + + def setup(self): + + @self.app.task(shared=False) + def raising(x, y): + raise KeyError(x, y) + self.raising = raising + + def test_wait_raises(self): + res = self.raising.apply(args=[3, 3]) + with self.assertRaises(KeyError): + res.wait() + self.assertTrue(res.wait(propagate=False)) + + def test_wait(self): + res = EagerResult('x', 'x', states.RETRY) + res.wait() + self.assertEqual(res.state, states.RETRY) + self.assertEqual(res.status, states.RETRY) + + def test_forget(self): + res = EagerResult('x', 'x', states.RETRY) + res.forget() + + def test_revoke(self): + res = self.raising.apply(args=[3, 3]) + self.assertFalse(res.revoke()) + + +class test_tuples(AppCase): + + def test_AsyncResult(self): + x = self.app.AsyncResult(uuid()) + self.assertEqual(x, result_from_tuple(x.as_tuple(), self.app)) + self.assertEqual(x, result_from_tuple(x, self.app)) + + def test_with_parent(self): + x = self.app.AsyncResult(uuid()) + x.parent = self.app.AsyncResult(uuid()) + y = result_from_tuple(x.as_tuple(), self.app) + self.assertEqual(y, x) + self.assertEqual(y.parent, x.parent) + self.assertIsInstance(y.parent, AsyncResult) + + def test_compat(self): + uid = uuid() + x = result_from_tuple([uid, []], app=self.app) + self.assertEqual(x.id, uid) + + def test_GroupResult(self): + x = self.app.GroupResult( + uuid(), [self.app.AsyncResult(uuid()) for _ in range(10)], + ) + self.assertEqual(x, result_from_tuple(x.as_tuple(), self.app)) + self.assertEqual(x, result_from_tuple(x, self.app)) diff --git a/celery/tests/tasks/test_states.py b/celery/tests/tasks/test_states.py new file mode 100644 index 0000000..b30a4ee --- /dev/null +++ b/celery/tests/tasks/test_states.py @@ -0,0 +1,31 @@ +from __future__ import absolute_import + +from celery.states import state +from celery import states +from celery.tests.case import Case + + +class test_state_precedence(Case): + + def test_gt(self): + self.assertGreater(state(states.SUCCESS), + state(states.PENDING)) + self.assertGreater(state(states.FAILURE), + state(states.RECEIVED)) + self.assertGreater(state(states.REVOKED), + state(states.STARTED)) + self.assertGreater(state(states.SUCCESS), + state('CRASHED')) + self.assertGreater(state(states.FAILURE), + state('CRASHED')) + self.assertFalse(state(states.REVOKED) > state('CRASHED')) + + def test_lt(self): + self.assertLess(state(states.PENDING), state(states.SUCCESS)) + self.assertLess(state(states.RECEIVED), state(states.FAILURE)) + self.assertLess(state(states.STARTED), state(states.REVOKED)) + self.assertLess(state('CRASHED'), state(states.SUCCESS)) + self.assertLess(state('CRASHED'), state(states.FAILURE)) + self.assertTrue(state(states.REVOKED) < state('CRASHED')) + self.assertTrue(state(states.REVOKED) <= state('CRASHED')) + self.assertTrue(state('CRASHED') >= state(states.REVOKED)) diff --git a/celery/tests/tasks/test_tasks.py b/celery/tests/tasks/test_tasks.py new file mode 100644 index 0000000..4feae0b --- /dev/null +++ b/celery/tests/tasks/test_tasks.py @@ -0,0 +1,464 @@ +from __future__ import absolute_import + +from datetime import datetime, timedelta + +from kombu import Queue + +from celery import Task + +from celery.exceptions import Retry +from celery.five import items, range, string_t +from celery.result import EagerResult +from celery.utils import uuid +from celery.utils.timeutils import parse_iso8601 + +from celery.tests.case import AppCase, depends_on_current_app, patch + + +def return_True(*args, **kwargs): + # Task run functions can't be closures/lambdas, as they're pickled. + return True + + +def raise_exception(self, **kwargs): + raise Exception('%s error' % self.__class__) + + +class MockApplyTask(Task): + abstract = True + applied = 0 + + def run(self, x, y): + return x * y + + def apply_async(self, *args, **kwargs): + self.applied += 1 + + +class TasksCase(AppCase): + + def setup(self): + self.mytask = self.app.task(shared=False)(return_True) + + @self.app.task(bind=True, count=0, shared=False) + def increment_counter(self, increment_by=1): + self.count += increment_by or 1 + return self.count + self.increment_counter = increment_counter + + @self.app.task(shared=False) + def raising(): + raise KeyError('foo') + self.raising = raising + + @self.app.task(bind=True, max_retries=3, iterations=0, shared=False) + def retry_task(self, arg1, arg2, kwarg=1, max_retries=None, care=True): + self.iterations += 1 + rmax = self.max_retries if max_retries is None else max_retries + + assert repr(self.request) + retries = self.request.retries + if care and retries >= rmax: + return arg1 + else: + raise self.retry(countdown=0, max_retries=rmax) + self.retry_task = retry_task + + @self.app.task(bind=True, max_retries=3, iterations=0, shared=False) + def retry_task_noargs(self, **kwargs): + self.iterations += 1 + + if self.request.retries >= 3: + return 42 + else: + raise self.retry(countdown=0) + self.retry_task_noargs = retry_task_noargs + + @self.app.task(bind=True, max_retries=3, iterations=0, + base=MockApplyTask, shared=False) + def retry_task_mockapply(self, arg1, arg2, kwarg=1): + self.iterations += 1 + + retries = self.request.retries + if retries >= 3: + return arg1 + raise self.retry(countdown=0) + self.retry_task_mockapply = retry_task_mockapply + + @self.app.task(bind=True, max_retries=3, iterations=0, shared=False) + def retry_task_customexc(self, arg1, arg2, kwarg=1, **kwargs): + self.iterations += 1 + + retries = self.request.retries + if retries >= 3: + return arg1 + kwarg + else: + try: + raise MyCustomException('Elaine Marie Benes') + except MyCustomException as exc: + kwargs.update(kwarg=kwarg) + raise self.retry(countdown=0, exc=exc) + self.retry_task_customexc = retry_task_customexc + + +class MyCustomException(Exception): + """Random custom exception.""" + + +class test_task_retries(TasksCase): + + def test_retry(self): + self.retry_task.max_retries = 3 + self.retry_task.iterations = 0 + self.retry_task.apply([0xFF, 0xFFFF]) + self.assertEqual(self.retry_task.iterations, 4) + + self.retry_task.max_retries = 3 + self.retry_task.iterations = 0 + self.retry_task.apply([0xFF, 0xFFFF], {'max_retries': 10}) + self.assertEqual(self.retry_task.iterations, 11) + + def test_retry_no_args(self): + self.retry_task_noargs.max_retries = 3 + self.retry_task_noargs.iterations = 0 + self.retry_task_noargs.apply(propagate=True).get() + self.assertEqual(self.retry_task_noargs.iterations, 4) + + def test_retry_kwargs_can_be_empty(self): + self.retry_task_mockapply.push_request() + try: + with self.assertRaises(Retry): + import sys + try: + sys.exc_clear() + except AttributeError: + pass + self.retry_task_mockapply.retry(args=[4, 4], kwargs=None) + finally: + self.retry_task_mockapply.pop_request() + + def test_retry_not_eager(self): + self.retry_task_mockapply.push_request() + try: + self.retry_task_mockapply.request.called_directly = False + exc = Exception('baz') + try: + self.retry_task_mockapply.retry( + args=[4, 4], kwargs={'task_retries': 0}, + exc=exc, throw=False, + ) + self.assertTrue(self.retry_task_mockapply.applied) + finally: + self.retry_task_mockapply.applied = 0 + + try: + with self.assertRaises(Retry): + self.retry_task_mockapply.retry( + args=[4, 4], kwargs={'task_retries': 0}, + exc=exc, throw=True) + self.assertTrue(self.retry_task_mockapply.applied) + finally: + self.retry_task_mockapply.applied = 0 + finally: + self.retry_task_mockapply.pop_request() + + def test_retry_with_kwargs(self): + self.retry_task_customexc.max_retries = 3 + self.retry_task_customexc.iterations = 0 + self.retry_task_customexc.apply([0xFF, 0xFFFF], {'kwarg': 0xF}) + self.assertEqual(self.retry_task_customexc.iterations, 4) + + def test_retry_with_custom_exception(self): + self.retry_task_customexc.max_retries = 2 + self.retry_task_customexc.iterations = 0 + result = self.retry_task_customexc.apply( + [0xFF, 0xFFFF], {'kwarg': 0xF}, + ) + with self.assertRaises(MyCustomException): + result.get() + self.assertEqual(self.retry_task_customexc.iterations, 3) + + def test_max_retries_exceeded(self): + self.retry_task.max_retries = 2 + self.retry_task.iterations = 0 + result = self.retry_task.apply([0xFF, 0xFFFF], {'care': False}) + with self.assertRaises(self.retry_task.MaxRetriesExceededError): + result.get() + self.assertEqual(self.retry_task.iterations, 3) + + self.retry_task.max_retries = 1 + self.retry_task.iterations = 0 + result = self.retry_task.apply([0xFF, 0xFFFF], {'care': False}) + with self.assertRaises(self.retry_task.MaxRetriesExceededError): + result.get() + self.assertEqual(self.retry_task.iterations, 2) + + +class test_canvas_utils(TasksCase): + + def test_si(self): + self.assertTrue(self.retry_task.si()) + self.assertTrue(self.retry_task.si().immutable) + + def test_chunks(self): + self.assertTrue(self.retry_task.chunks(range(100), 10)) + + def test_map(self): + self.assertTrue(self.retry_task.map(range(100))) + + def test_starmap(self): + self.assertTrue(self.retry_task.starmap(range(100))) + + def test_on_success(self): + self.retry_task.on_success(1, 1, (), {}) + + +class test_tasks(TasksCase): + + def now(self): + return self.app.now() + + @depends_on_current_app + def test_unpickle_task(self): + import pickle + + @self.app.task(shared=True) + def xxx(): + pass + self.assertIs(pickle.loads(pickle.dumps(xxx)), xxx.app.tasks[xxx.name]) + + def test_AsyncResult(self): + task_id = uuid() + result = self.retry_task.AsyncResult(task_id) + self.assertEqual(result.backend, self.retry_task.backend) + self.assertEqual(result.id, task_id) + + def assertNextTaskDataEqual(self, consumer, presult, task_name, + test_eta=False, test_expires=False, **kwargs): + next_task = consumer.queues[0].get(accept=['pickle']) + task_data = next_task.decode() + self.assertEqual(task_data['id'], presult.id) + self.assertEqual(task_data['task'], task_name) + task_kwargs = task_data.get('kwargs', {}) + if test_eta: + self.assertIsInstance(task_data.get('eta'), string_t) + to_datetime = parse_iso8601(task_data.get('eta')) + self.assertIsInstance(to_datetime, datetime) + if test_expires: + self.assertIsInstance(task_data.get('expires'), string_t) + to_datetime = parse_iso8601(task_data.get('expires')) + self.assertIsInstance(to_datetime, datetime) + for arg_name, arg_value in items(kwargs): + self.assertEqual(task_kwargs.get(arg_name), arg_value) + + def test_incomplete_task_cls(self): + + class IncompleteTask(Task): + app = self.app + name = 'c.unittest.t.itask' + + with self.assertRaises(NotImplementedError): + IncompleteTask().run() + + def test_task_kwargs_must_be_dictionary(self): + with self.assertRaises(ValueError): + self.increment_counter.apply_async([], 'str') + + def test_task_args_must_be_list(self): + with self.assertRaises(ValueError): + self.increment_counter.apply_async('str', {}) + + def test_regular_task(self): + self.assertIsInstance(self.mytask, Task) + self.assertTrue(self.mytask.run()) + self.assertTrue( + callable(self.mytask), 'Task class is callable()', + ) + self.assertTrue(self.mytask(), 'Task class runs run() when called') + + with self.app.connection_or_acquire() as conn: + consumer = self.app.amqp.TaskConsumer(conn) + with self.assertRaises(NotImplementedError): + consumer.receive('foo', 'foo') + consumer.purge() + self.assertIsNone(consumer.queues[0].get()) + self.app.amqp.TaskConsumer(conn, queues=[Queue('foo')]) + + # Without arguments. + presult = self.mytask.delay() + self.assertNextTaskDataEqual(consumer, presult, self.mytask.name) + + # With arguments. + presult2 = self.mytask.apply_async( + kwargs=dict(name='George Costanza'), + ) + self.assertNextTaskDataEqual( + consumer, presult2, self.mytask.name, name='George Costanza', + ) + + # send_task + sresult = self.app.send_task(self.mytask.name, + kwargs=dict(name='Elaine M. Benes')) + self.assertNextTaskDataEqual( + consumer, sresult, self.mytask.name, name='Elaine M. Benes', + ) + + # With eta. + presult2 = self.mytask.apply_async( + kwargs=dict(name='George Costanza'), + eta=self.now() + timedelta(days=1), + expires=self.now() + timedelta(days=2), + ) + self.assertNextTaskDataEqual( + consumer, presult2, self.mytask.name, + name='George Costanza', test_eta=True, test_expires=True, + ) + + # With countdown. + presult2 = self.mytask.apply_async( + kwargs=dict(name='George Costanza'), countdown=10, expires=12, + ) + self.assertNextTaskDataEqual( + consumer, presult2, self.mytask.name, + name='George Costanza', test_eta=True, test_expires=True, + ) + + # Discarding all tasks. + consumer.purge() + self.mytask.apply_async() + self.assertEqual(consumer.purge(), 1) + self.assertIsNone(consumer.queues[0].get()) + + self.assertFalse(presult.successful()) + self.mytask.backend.mark_as_done(presult.id, result=None) + self.assertTrue(presult.successful()) + + def test_repr_v2_compat(self): + self.mytask.__v2_compat__ = True + self.assertIn('v2 compatible', repr(self.mytask)) + + def test_apply_with_self(self): + + @self.app.task(__self__=42, shared=False) + def tawself(self): + return self + + self.assertEqual(tawself.apply().get(), 42) + + self.assertEqual(tawself(), 42) + + def test_context_get(self): + self.mytask.push_request() + try: + request = self.mytask.request + request.foo = 32 + self.assertEqual(request.get('foo'), 32) + self.assertEqual(request.get('bar', 36), 36) + request.clear() + finally: + self.mytask.pop_request() + + def test_task_class_repr(self): + self.assertIn('class Task of', repr(self.mytask.app.Task)) + self.mytask.app.Task._app = None + self.assertIn('unbound', repr(self.mytask.app.Task, )) + + def test_bind_no_magic_kwargs(self): + self.mytask.accept_magic_kwargs = None + self.mytask.bind(self.mytask.app) + + def test_annotate(self): + with patch('celery.app.task.resolve_all_annotations') as anno: + anno.return_value = [{'FOO': 'BAR'}] + + @self.app.task(shared=False) + def task(): + pass + task.annotate() + self.assertEqual(task.FOO, 'BAR') + + def test_after_return(self): + self.mytask.push_request() + try: + self.mytask.request.chord = self.mytask.s() + self.mytask.after_return('SUCCESS', 1.0, 'foobar', (), {}, None) + self.mytask.request.clear() + finally: + self.mytask.pop_request() + + def test_send_task_sent_event(self): + with self.app.connection() as conn: + self.app.conf.CELERY_SEND_TASK_SENT_EVENT = True + self.assertTrue(self.app.amqp.TaskProducer(conn).send_sent_event) + + def test_update_state(self): + + @self.app.task(shared=False) + def yyy(): + pass + + yyy.push_request() + try: + tid = uuid() + yyy.update_state(tid, 'FROBULATING', {'fooz': 'baaz'}) + self.assertEqual(yyy.AsyncResult(tid).status, 'FROBULATING') + self.assertDictEqual(yyy.AsyncResult(tid).result, {'fooz': 'baaz'}) + + yyy.request.id = tid + yyy.update_state(state='FROBUZATING', meta={'fooz': 'baaz'}) + self.assertEqual(yyy.AsyncResult(tid).status, 'FROBUZATING') + self.assertDictEqual(yyy.AsyncResult(tid).result, {'fooz': 'baaz'}) + finally: + yyy.pop_request() + + def test_repr(self): + + @self.app.task(shared=False) + def task_test_repr(): + pass + + self.assertIn('task_test_repr', repr(task_test_repr)) + + def test_has___name__(self): + + @self.app.task(shared=False) + def yyy2(): + pass + + self.assertTrue(yyy2.__name__) + + +class test_apply_task(TasksCase): + + def test_apply_throw(self): + with self.assertRaises(KeyError): + self.raising.apply(throw=True) + + def test_apply_with_CELERY_EAGER_PROPAGATES_EXCEPTIONS(self): + self.app.conf.CELERY_EAGER_PROPAGATES_EXCEPTIONS = True + with self.assertRaises(KeyError): + self.raising.apply() + + def test_apply(self): + self.increment_counter.count = 0 + + e = self.increment_counter.apply() + self.assertIsInstance(e, EagerResult) + self.assertEqual(e.get(), 1) + + e = self.increment_counter.apply(args=[1]) + self.assertEqual(e.get(), 2) + + e = self.increment_counter.apply(kwargs={'increment_by': 4}) + self.assertEqual(e.get(), 6) + + self.assertTrue(e.successful()) + self.assertTrue(e.ready()) + self.assertTrue(repr(e).startswith('> 2, Proxy(lambda: 2)) + self.assertEqual(Proxy(lambda: 10) ^ 7, Proxy(lambda: 13)) + self.assertEqual(Proxy(lambda: 10) | 40, Proxy(lambda: 42)) + self.assertEqual(~Proxy(lambda: 10), Proxy(lambda: -11)) + self.assertEqual(-Proxy(lambda: 10), Proxy(lambda: -10)) + self.assertEqual(+Proxy(lambda: -10), Proxy(lambda: -10)) + self.assertTrue(Proxy(lambda: 10) < Proxy(lambda: 20)) + self.assertTrue(Proxy(lambda: 20) > Proxy(lambda: 10)) + self.assertTrue(Proxy(lambda: 10) >= Proxy(lambda: 10)) + self.assertTrue(Proxy(lambda: 10) <= Proxy(lambda: 10)) + self.assertTrue(Proxy(lambda: 10) == Proxy(lambda: 10)) + self.assertTrue(Proxy(lambda: 20) != Proxy(lambda: 10)) + self.assertTrue(Proxy(lambda: 100).__divmod__(30)) + self.assertTrue(Proxy(lambda: 100).__truediv__(30)) + self.assertTrue(abs(Proxy(lambda: -100))) + + x = Proxy(lambda: 10) + x -= 1 + self.assertEqual(x, 9) + x = Proxy(lambda: 9) + x += 1 + self.assertEqual(x, 10) + x = Proxy(lambda: 10) + x *= 2 + self.assertEqual(x, 20) + x = Proxy(lambda: 20) + x /= 2 + self.assertEqual(x, 10) + x = Proxy(lambda: 10) + x %= 2 + self.assertEqual(x, 0) + x = Proxy(lambda: 10) + x <<= 3 + self.assertEqual(x, 80) + x = Proxy(lambda: 80) + x >>= 4 + self.assertEqual(x, 5) + x = Proxy(lambda: 5) + x ^= 1 + self.assertEqual(x, 4) + x = Proxy(lambda: 4) + x **= 4 + self.assertEqual(x, 256) + x = Proxy(lambda: 256) + x //= 2 + self.assertEqual(x, 128) + x = Proxy(lambda: 128) + x |= 2 + self.assertEqual(x, 130) + x = Proxy(lambda: 130) + x &= 10 + self.assertEqual(x, 2) + + x = Proxy(lambda: 10) + self.assertEqual(type(x.__float__()), float) + self.assertEqual(type(x.__int__()), int) + if not PY3: + self.assertEqual(type(x.__long__()), long_t) + self.assertTrue(hex(x)) + self.assertTrue(oct(x)) + + def test_hash(self): + + class X(object): + + def __hash__(self): + return 1234 + + self.assertEqual(hash(Proxy(lambda: X())), 1234) + + def test_call(self): + + class X(object): + + def __call__(self): + return 1234 + + self.assertEqual(Proxy(lambda: X())(), 1234) + + def test_context(self): + + class X(object): + entered = exited = False + + def __enter__(self): + self.entered = True + return 1234 + + def __exit__(self, *exc_info): + self.exited = True + + v = X() + x = Proxy(lambda: v) + with x as val: + self.assertEqual(val, 1234) + self.assertTrue(x.entered) + self.assertTrue(x.exited) + + def test_reduce(self): + + class X(object): + + def __reduce__(self): + return 123 + + x = Proxy(lambda: X()) + self.assertEqual(x.__reduce__(), 123) + + +class test_PromiseProxy(Case): + + def test_only_evaluated_once(self): + + class X(object): + attr = 123 + evals = 0 + + def __init__(self): + self.__class__.evals += 1 + + p = PromiseProxy(X) + self.assertEqual(p.attr, 123) + self.assertEqual(p.attr, 123) + self.assertEqual(X.evals, 1) + + def test_callbacks(self): + source = Mock(name='source') + p = PromiseProxy(source) + cbA = Mock(name='cbA') + cbB = Mock(name='cbB') + cbC = Mock(name='cbC') + p.__then__(cbA, p) + p.__then__(cbB, p) + self.assertFalse(p.__evaluated__()) + self.assertTrue(object.__getattribute__(p, '__pending__')) + + self.assertTrue(repr(p)) + self.assertTrue(p.__evaluated__()) + with self.assertRaises(AttributeError): + object.__getattribute__(p, '__pending__') + cbA.assert_called_with(p) + cbB.assert_called_with(p) + + self.assertTrue(p.__evaluated__()) + p.__then__(cbC, p) + cbC.assert_called_with(p) + + with self.assertRaises(AttributeError): + object.__getattribute__(p, '__pending__') + + def test_maybe_evaluate(self): + x = PromiseProxy(lambda: 30) + self.assertFalse(x.__evaluated__()) + self.assertEqual(maybe_evaluate(x), 30) + self.assertEqual(maybe_evaluate(x), 30) + + self.assertEqual(maybe_evaluate(30), 30) + self.assertTrue(x.__evaluated__()) diff --git a/celery/tests/utils/test_mail.py b/celery/tests/utils/test_mail.py new file mode 100644 index 0000000..4006fb0 --- /dev/null +++ b/celery/tests/utils/test_mail.py @@ -0,0 +1,53 @@ +from __future__ import absolute_import + +from celery.utils.mail import Message, Mailer, SSLError + +from celery.tests.case import Case, Mock, patch + + +msg = Message(to='george@vandelay.com', sender='elaine@pendant.com', + subject="What's up with Jerry?", body='???!') + + +class test_Message(Case): + + def test_repr(self): + self.assertTrue(repr(msg)) + + def test_str(self): + self.assertTrue(str(msg)) + + +class test_Mailer(Case): + + def test_send_wrapper(self): + mailer = Mailer() + mailer._send = Mock() + mailer.send(msg) + mailer._send.assert_called_with(msg) + + @patch('smtplib.SMTP_SSL', create=True) + def test_send_ssl_tls(self, SMTP_SSL): + mailer = Mailer(use_ssl=True, use_tls=True) + client = SMTP_SSL.return_value = Mock() + mailer._send(msg) + self.assertTrue(client.starttls.called) + self.assertEqual(client.ehlo.call_count, 2) + client.quit.assert_called_with() + client.sendmail.assert_called_with(msg.sender, msg.to, str(msg)) + mailer = Mailer(use_ssl=True, use_tls=True, user='foo', + password='bar') + mailer._send(msg) + client.login.assert_called_with('foo', 'bar') + + @patch('smtplib.SMTP') + def test_send(self, SMTP): + client = SMTP.return_value = Mock() + mailer = Mailer(use_ssl=False, use_tls=False) + mailer._send(msg) + + client.sendmail.assert_called_With(msg.sender, msg.to, str(msg)) + + client.quit.side_effect = SSLError() + mailer._send(msg) + client.close.assert_called_with() diff --git a/celery/tests/utils/test_pickle.py b/celery/tests/utils/test_pickle.py new file mode 100644 index 0000000..6b65bb3 --- /dev/null +++ b/celery/tests/utils/test_pickle.py @@ -0,0 +1,51 @@ +from __future__ import absolute_import + +from celery.utils.serialization import pickle +from celery.tests.case import Case + + +class RegularException(Exception): + pass + + +class ArgOverrideException(Exception): + + def __init__(self, message, status_code=10): + self.status_code = status_code + Exception.__init__(self, message, status_code) + + +class test_Pickle(Case): + + def test_pickle_regular_exception(self): + exc = None + try: + raise RegularException('RegularException raised') + except RegularException as exc_: + exc = exc_ + + pickled = pickle.dumps({'exception': exc}) + unpickled = pickle.loads(pickled) + exception = unpickled.get('exception') + self.assertTrue(exception) + self.assertIsInstance(exception, RegularException) + self.assertTupleEqual(exception.args, ('RegularException raised', )) + + def test_pickle_arg_override_exception(self): + + exc = None + try: + raise ArgOverrideException( + 'ArgOverrideException raised', status_code=100, + ) + except ArgOverrideException as exc_: + exc = exc_ + + pickled = pickle.dumps({'exception': exc}) + unpickled = pickle.loads(pickled) + exception = unpickled.get('exception') + self.assertTrue(exception) + self.assertIsInstance(exception, ArgOverrideException) + self.assertTupleEqual(exception.args, ( + 'ArgOverrideException raised', 100)) + self.assertEqual(exception.status_code, 100) diff --git a/celery/tests/utils/test_platforms.py b/celery/tests/utils/test_platforms.py new file mode 100644 index 0000000..aae0b38 --- /dev/null +++ b/celery/tests/utils/test_platforms.py @@ -0,0 +1,701 @@ +from __future__ import absolute_import + +import errno +import os +import sys +import signal + +from celery import _find_option_with_arg +from celery import platforms +from celery.five import open_fqdn +from celery.platforms import ( + get_fdmax, + ignore_errno, + set_process_title, + signals, + maybe_drop_privileges, + setuid, + setgid, + initgroups, + parse_uid, + parse_gid, + detached, + DaemonContext, + create_pidlock, + Pidfile, + LockFailed, + setgroups, + _setgroups_hack, + close_open_fds, +) + +try: + import resource +except ImportError: # pragma: no cover + resource = None # noqa + +from celery.tests.case import ( + Case, WhateverIO, Mock, SkipTest, + call, override_stdouts, mock_open, patch, +) + + +class test_find_option_with_arg(Case): + + def test_long_opt(self): + self.assertEqual( + _find_option_with_arg(['--foo=bar'], long_opts=['--foo']), + 'bar' + ) + + def test_short_opt(self): + self.assertEqual( + _find_option_with_arg(['-f', 'bar'], short_opts=['-f']), + 'bar' + ) + + +class test_close_open_fds(Case): + + def test_closes(self): + with patch('os.close') as _close: + with patch('os.closerange', create=True) as closerange: + with patch('celery.platforms.get_fdmax') as fdmax: + fdmax.return_value = 3 + close_open_fds() + if not closerange.called: + _close.assert_has_calls([call(2), call(1), call(0)]) + _close.side_effect = OSError() + _close.side_effect.errno = errno.EBADF + close_open_fds() + + +class test_ignore_errno(Case): + + def test_raises_EBADF(self): + with ignore_errno('EBADF'): + exc = OSError() + exc.errno = errno.EBADF + raise exc + + def test_otherwise(self): + with self.assertRaises(OSError): + with ignore_errno('EBADF'): + exc = OSError() + exc.errno = errno.ENOENT + raise exc + + +class test_set_process_title(Case): + + def when_no_setps(self): + prev = platforms._setproctitle = platforms._setproctitle, None + try: + set_process_title('foo') + finally: + platforms._setproctitle = prev + + +class test_Signals(Case): + + @patch('signal.getsignal') + def test_getitem(self, getsignal): + signals['SIGINT'] + getsignal.assert_called_with(signal.SIGINT) + + def test_supported(self): + self.assertTrue(signals.supported('INT')) + self.assertFalse(signals.supported('SIGIMAGINARY')) + + def test_reset_alarm(self): + if sys.platform == 'win32': + raise SkipTest('signal.alarm not available on Windows') + with patch('signal.alarm') as _alarm: + signals.reset_alarm() + _alarm.assert_called_with(0) + + def test_arm_alarm(self): + if hasattr(signal, 'setitimer'): + with patch('signal.setitimer', create=True) as seti: + signals.arm_alarm(30) + self.assertTrue(seti.called) + + def test_signum(self): + self.assertEqual(signals.signum(13), 13) + self.assertEqual(signals.signum('INT'), signal.SIGINT) + self.assertEqual(signals.signum('SIGINT'), signal.SIGINT) + with self.assertRaises(TypeError): + signals.signum('int') + signals.signum(object()) + + @patch('signal.signal') + def test_ignore(self, set): + signals.ignore('SIGINT') + set.assert_called_with(signals.signum('INT'), signals.ignored) + signals.ignore('SIGTERM') + set.assert_called_with(signals.signum('TERM'), signals.ignored) + + @patch('signal.signal') + def test_setitem(self, set): + handle = lambda *a: a + signals['INT'] = handle + set.assert_called_with(signal.SIGINT, handle) + + @patch('signal.signal') + def test_setitem_raises(self, set): + set.side_effect = ValueError() + signals['INT'] = lambda *a: a + + +if not platforms.IS_WINDOWS: + + class test_get_fdmax(Case): + + @patch('resource.getrlimit') + def test_when_infinity(self, getrlimit): + with patch('os.sysconf') as sysconfig: + sysconfig.side_effect = KeyError() + getrlimit.return_value = [None, resource.RLIM_INFINITY] + default = object() + self.assertIs(get_fdmax(default), default) + + @patch('resource.getrlimit') + def test_when_actual(self, getrlimit): + with patch('os.sysconf') as sysconfig: + sysconfig.side_effect = KeyError() + getrlimit.return_value = [None, 13] + self.assertEqual(get_fdmax(None), 13) + + class test_maybe_drop_privileges(Case): + + @patch('celery.platforms.parse_uid') + @patch('pwd.getpwuid') + @patch('celery.platforms.setgid') + @patch('celery.platforms.setuid') + @patch('celery.platforms.initgroups') + def test_with_uid(self, initgroups, setuid, setgid, + getpwuid, parse_uid): + + class pw_struct(object): + pw_gid = 50001 + + def raise_on_second_call(*args, **kwargs): + setuid.side_effect = OSError() + setuid.side_effect.errno = errno.EPERM + setuid.side_effect = raise_on_second_call + getpwuid.return_value = pw_struct() + parse_uid.return_value = 5001 + maybe_drop_privileges(uid='user') + parse_uid.assert_called_with('user') + getpwuid.assert_called_with(5001) + setgid.assert_called_with(50001) + initgroups.assert_called_with(5001, 50001) + setuid.assert_has_calls([call(5001), call(0)]) + + @patch('celery.platforms.parse_uid') + @patch('celery.platforms.parse_gid') + @patch('celery.platforms.setgid') + @patch('celery.platforms.setuid') + @patch('celery.platforms.initgroups') + def test_with_guid(self, initgroups, setuid, setgid, + parse_gid, parse_uid): + + def raise_on_second_call(*args, **kwargs): + setuid.side_effect = OSError() + setuid.side_effect.errno = errno.EPERM + setuid.side_effect = raise_on_second_call + parse_uid.return_value = 5001 + parse_gid.return_value = 50001 + maybe_drop_privileges(uid='user', gid='group') + parse_uid.assert_called_with('user') + parse_gid.assert_called_with('group') + setgid.assert_called_with(50001) + initgroups.assert_called_with(5001, 50001) + setuid.assert_has_calls([call(5001), call(0)]) + + setuid.side_effect = None + with self.assertRaises(RuntimeError): + maybe_drop_privileges(uid='user', gid='group') + setuid.side_effect = OSError() + setuid.side_effect.errno = errno.EINVAL + with self.assertRaises(OSError): + maybe_drop_privileges(uid='user', gid='group') + + @patch('celery.platforms.setuid') + @patch('celery.platforms.setgid') + @patch('celery.platforms.parse_gid') + def test_only_gid(self, parse_gid, setgid, setuid): + parse_gid.return_value = 50001 + maybe_drop_privileges(gid='group') + parse_gid.assert_called_with('group') + setgid.assert_called_with(50001) + self.assertFalse(setuid.called) + + class test_setget_uid_gid(Case): + + @patch('celery.platforms.parse_uid') + @patch('os.setuid') + def test_setuid(self, _setuid, parse_uid): + parse_uid.return_value = 5001 + setuid('user') + parse_uid.assert_called_with('user') + _setuid.assert_called_with(5001) + + @patch('celery.platforms.parse_gid') + @patch('os.setgid') + def test_setgid(self, _setgid, parse_gid): + parse_gid.return_value = 50001 + setgid('group') + parse_gid.assert_called_with('group') + _setgid.assert_called_with(50001) + + def test_parse_uid_when_int(self): + self.assertEqual(parse_uid(5001), 5001) + + @patch('pwd.getpwnam') + def test_parse_uid_when_existing_name(self, getpwnam): + + class pwent(object): + pw_uid = 5001 + + getpwnam.return_value = pwent() + self.assertEqual(parse_uid('user'), 5001) + + @patch('pwd.getpwnam') + def test_parse_uid_when_nonexisting_name(self, getpwnam): + getpwnam.side_effect = KeyError('user') + + with self.assertRaises(KeyError): + parse_uid('user') + + def test_parse_gid_when_int(self): + self.assertEqual(parse_gid(50001), 50001) + + @patch('grp.getgrnam') + def test_parse_gid_when_existing_name(self, getgrnam): + + class grent(object): + gr_gid = 50001 + + getgrnam.return_value = grent() + self.assertEqual(parse_gid('group'), 50001) + + @patch('grp.getgrnam') + def test_parse_gid_when_nonexisting_name(self, getgrnam): + getgrnam.side_effect = KeyError('group') + + with self.assertRaises(KeyError): + parse_gid('group') + + class test_initgroups(Case): + + @patch('pwd.getpwuid') + @patch('os.initgroups', create=True) + def test_with_initgroups(self, initgroups_, getpwuid): + getpwuid.return_value = ['user'] + initgroups(5001, 50001) + initgroups_.assert_called_with('user', 50001) + + @patch('celery.platforms.setgroups') + @patch('grp.getgrall') + @patch('pwd.getpwuid') + def test_without_initgroups(self, getpwuid, getgrall, setgroups): + prev = getattr(os, 'initgroups', None) + try: + delattr(os, 'initgroups') + except AttributeError: + pass + try: + getpwuid.return_value = ['user'] + + class grent(object): + gr_mem = ['user'] + + def __init__(self, gid): + self.gr_gid = gid + + getgrall.return_value = [grent(1), grent(2), grent(3)] + initgroups(5001, 50001) + setgroups.assert_called_with([1, 2, 3]) + finally: + if prev: + os.initgroups = prev + + class test_detached(Case): + + def test_without_resource(self): + prev, platforms.resource = platforms.resource, None + try: + with self.assertRaises(RuntimeError): + detached() + finally: + platforms.resource = prev + + @patch('celery.platforms._create_pidlock') + @patch('celery.platforms.signals') + @patch('celery.platforms.maybe_drop_privileges') + @patch('os.geteuid') + @patch(open_fqdn) + def test_default(self, open, geteuid, maybe_drop, + signals, pidlock): + geteuid.return_value = 0 + context = detached(uid='user', gid='group') + self.assertIsInstance(context, DaemonContext) + signals.reset.assert_called_with('SIGCLD') + maybe_drop.assert_called_with(uid='user', gid='group') + open.return_value = Mock() + + geteuid.return_value = 5001 + context = detached(uid='user', gid='group', logfile='/foo/bar') + self.assertIsInstance(context, DaemonContext) + self.assertTrue(context.after_chdir) + context.after_chdir() + open.assert_called_with('/foo/bar', 'a') + open.return_value.close.assert_called_with() + + context = detached(pidfile='/foo/bar/pid') + self.assertIsInstance(context, DaemonContext) + self.assertTrue(context.after_chdir) + context.after_chdir() + pidlock.assert_called_with('/foo/bar/pid') + + class test_DaemonContext(Case): + + @patch('os.fork') + @patch('os.setsid') + @patch('os._exit') + @patch('os.chdir') + @patch('os.umask') + @patch('os.close') + @patch('os.closerange') + @patch('os.open') + @patch('os.dup2') + def test_open(self, dup2, open, close, closer, umask, chdir, + _exit, setsid, fork): + x = DaemonContext(workdir='/opt/workdir', umask=0o22) + x.stdfds = [0, 1, 2] + + fork.return_value = 0 + with x: + self.assertTrue(x._is_open) + with x: + pass + self.assertEqual(fork.call_count, 2) + setsid.assert_called_with() + self.assertFalse(_exit.called) + + chdir.assert_called_with(x.workdir) + umask.assert_called_with(0o22) + self.assertTrue(dup2.called) + + fork.reset_mock() + fork.return_value = 1 + x = DaemonContext(workdir='/opt/workdir') + x.stdfds = [0, 1, 2] + with x: + pass + self.assertEqual(fork.call_count, 1) + _exit.assert_called_with(0) + + x = DaemonContext(workdir='/opt/workdir', fake=True) + x.stdfds = [0, 1, 2] + x._detach = Mock() + with x: + pass + self.assertFalse(x._detach.called) + + x.after_chdir = Mock() + with x: + pass + x.after_chdir.assert_called_with() + + class test_Pidfile(Case): + + @patch('celery.platforms.Pidfile') + def test_create_pidlock(self, Pidfile): + p = Pidfile.return_value = Mock() + p.is_locked.return_value = True + p.remove_if_stale.return_value = False + with override_stdouts() as (_, err): + with self.assertRaises(SystemExit): + create_pidlock('/var/pid') + self.assertIn('already exists', err.getvalue()) + + p.remove_if_stale.return_value = True + ret = create_pidlock('/var/pid') + self.assertIs(ret, p) + + def test_context(self): + p = Pidfile('/var/pid') + p.write_pid = Mock() + p.remove = Mock() + + with p as _p: + self.assertIs(_p, p) + p.write_pid.assert_called_with() + p.remove.assert_called_with() + + def test_acquire_raises_LockFailed(self): + p = Pidfile('/var/pid') + p.write_pid = Mock() + p.write_pid.side_effect = OSError() + + with self.assertRaises(LockFailed): + with p: + pass + + @patch('os.path.exists') + def test_is_locked(self, exists): + p = Pidfile('/var/pid') + exists.return_value = True + self.assertTrue(p.is_locked()) + exists.return_value = False + self.assertFalse(p.is_locked()) + + def test_read_pid(self): + with mock_open() as s: + s.write('1816\n') + s.seek(0) + p = Pidfile('/var/pid') + self.assertEqual(p.read_pid(), 1816) + + def test_read_pid_partially_written(self): + with mock_open() as s: + s.write('1816') + s.seek(0) + p = Pidfile('/var/pid') + with self.assertRaises(ValueError): + p.read_pid() + + def test_read_pid_raises_ENOENT(self): + exc = IOError() + exc.errno = errno.ENOENT + with mock_open(side_effect=exc): + p = Pidfile('/var/pid') + self.assertIsNone(p.read_pid()) + + def test_read_pid_raises_IOError(self): + exc = IOError() + exc.errno = errno.EAGAIN + with mock_open(side_effect=exc): + p = Pidfile('/var/pid') + with self.assertRaises(IOError): + p.read_pid() + + def test_read_pid_bogus_pidfile(self): + with mock_open() as s: + s.write('eighteensixteen\n') + s.seek(0) + p = Pidfile('/var/pid') + with self.assertRaises(ValueError): + p.read_pid() + + @patch('os.unlink') + def test_remove(self, unlink): + unlink.return_value = True + p = Pidfile('/var/pid') + p.remove() + unlink.assert_called_with(p.path) + + @patch('os.unlink') + def test_remove_ENOENT(self, unlink): + exc = OSError() + exc.errno = errno.ENOENT + unlink.side_effect = exc + p = Pidfile('/var/pid') + p.remove() + unlink.assert_called_with(p.path) + + @patch('os.unlink') + def test_remove_EACCES(self, unlink): + exc = OSError() + exc.errno = errno.EACCES + unlink.side_effect = exc + p = Pidfile('/var/pid') + p.remove() + unlink.assert_called_with(p.path) + + @patch('os.unlink') + def test_remove_OSError(self, unlink): + exc = OSError() + exc.errno = errno.EAGAIN + unlink.side_effect = exc + p = Pidfile('/var/pid') + with self.assertRaises(OSError): + p.remove() + unlink.assert_called_with(p.path) + + @patch('os.kill') + def test_remove_if_stale_process_alive(self, kill): + p = Pidfile('/var/pid') + p.read_pid = Mock() + p.read_pid.return_value = 1816 + kill.return_value = 0 + self.assertFalse(p.remove_if_stale()) + kill.assert_called_with(1816, 0) + p.read_pid.assert_called_with() + + kill.side_effect = OSError() + kill.side_effect.errno = errno.ENOENT + self.assertFalse(p.remove_if_stale()) + + @patch('os.kill') + def test_remove_if_stale_process_dead(self, kill): + with override_stdouts(): + p = Pidfile('/var/pid') + p.read_pid = Mock() + p.read_pid.return_value = 1816 + p.remove = Mock() + exc = OSError() + exc.errno = errno.ESRCH + kill.side_effect = exc + self.assertTrue(p.remove_if_stale()) + kill.assert_called_with(1816, 0) + p.remove.assert_called_with() + + def test_remove_if_stale_broken_pid(self): + with override_stdouts(): + p = Pidfile('/var/pid') + p.read_pid = Mock() + p.read_pid.side_effect = ValueError() + p.remove = Mock() + + self.assertTrue(p.remove_if_stale()) + p.remove.assert_called_with() + + def test_remove_if_stale_no_pidfile(self): + p = Pidfile('/var/pid') + p.read_pid = Mock() + p.read_pid.return_value = None + p.remove = Mock() + + self.assertTrue(p.remove_if_stale()) + p.remove.assert_called_with() + + @patch('os.fsync') + @patch('os.getpid') + @patch('os.open') + @patch('os.fdopen') + @patch(open_fqdn) + def test_write_pid(self, open_, fdopen, osopen, getpid, fsync): + getpid.return_value = 1816 + osopen.return_value = 13 + w = fdopen.return_value = WhateverIO() + w.close = Mock() + r = open_.return_value = WhateverIO() + r.write('1816\n') + r.seek(0) + + p = Pidfile('/var/pid') + p.write_pid() + w.seek(0) + self.assertEqual(w.readline(), '1816\n') + self.assertTrue(w.close.called) + getpid.assert_called_with() + osopen.assert_called_with(p.path, platforms.PIDFILE_FLAGS, + platforms.PIDFILE_MODE) + fdopen.assert_called_with(13, 'w') + fsync.assert_called_with(13) + open_.assert_called_with(p.path) + + @patch('os.fsync') + @patch('os.getpid') + @patch('os.open') + @patch('os.fdopen') + @patch(open_fqdn) + def test_write_reread_fails(self, open_, fdopen, + osopen, getpid, fsync): + getpid.return_value = 1816 + osopen.return_value = 13 + w = fdopen.return_value = WhateverIO() + w.close = Mock() + r = open_.return_value = WhateverIO() + r.write('11816\n') + r.seek(0) + + p = Pidfile('/var/pid') + with self.assertRaises(LockFailed): + p.write_pid() + + class test_setgroups(Case): + + @patch('os.setgroups', create=True) + def test_setgroups_hack_ValueError(self, setgroups): + + def on_setgroups(groups): + if len(groups) <= 200: + setgroups.return_value = True + return + raise ValueError() + setgroups.side_effect = on_setgroups + _setgroups_hack(list(range(400))) + + setgroups.side_effect = ValueError() + with self.assertRaises(ValueError): + _setgroups_hack(list(range(400))) + + @patch('os.setgroups', create=True) + def test_setgroups_hack_OSError(self, setgroups): + exc = OSError() + exc.errno = errno.EINVAL + + def on_setgroups(groups): + if len(groups) <= 200: + setgroups.return_value = True + return + raise exc + setgroups.side_effect = on_setgroups + + _setgroups_hack(list(range(400))) + + setgroups.side_effect = exc + with self.assertRaises(OSError): + _setgroups_hack(list(range(400))) + + exc2 = OSError() + exc.errno = errno.ESRCH + setgroups.side_effect = exc2 + with self.assertRaises(OSError): + _setgroups_hack(list(range(400))) + + @patch('os.sysconf') + @patch('celery.platforms._setgroups_hack') + def test_setgroups(self, hack, sysconf): + sysconf.return_value = 100 + setgroups(list(range(400))) + hack.assert_called_with(list(range(100))) + + @patch('os.sysconf') + @patch('celery.platforms._setgroups_hack') + def test_setgroups_sysconf_raises(self, hack, sysconf): + sysconf.side_effect = ValueError() + setgroups(list(range(400))) + hack.assert_called_with(list(range(400))) + + @patch('os.getgroups') + @patch('os.sysconf') + @patch('celery.platforms._setgroups_hack') + def test_setgroups_raises_ESRCH(self, hack, sysconf, getgroups): + sysconf.side_effect = ValueError() + esrch = OSError() + esrch.errno = errno.ESRCH + hack.side_effect = esrch + with self.assertRaises(OSError): + setgroups(list(range(400))) + + @patch('os.getgroups') + @patch('os.sysconf') + @patch('celery.platforms._setgroups_hack') + def test_setgroups_raises_EPERM(self, hack, sysconf, getgroups): + sysconf.side_effect = ValueError() + eperm = OSError() + eperm.errno = errno.EPERM + hack.side_effect = eperm + getgroups.return_value = list(range(400)) + setgroups(list(range(400))) + getgroups.assert_called_with() + + getgroups.return_value = [1000] + with self.assertRaises(OSError): + setgroups(list(range(400))) + getgroups.assert_called_with() diff --git a/celery/tests/utils/test_saferef.py b/celery/tests/utils/test_saferef.py new file mode 100644 index 0000000..9c18d71 --- /dev/null +++ b/celery/tests/utils/test_saferef.py @@ -0,0 +1,94 @@ +from __future__ import absolute_import + +from celery.five import range +from celery.utils.dispatch.saferef import safe_ref +from celery.tests.case import Case + + +class Class1(object): + + def x(self): + pass + + +def fun(obj): + pass + + +class Class2(object): + + def __call__(self, obj): + pass + + +class SaferefTests(Case): + + def setUp(self): + ts = [] + ss = [] + for x in range(5000): + t = Class1() + ts.append(t) + s = safe_ref(t.x, self._closure) + ss.append(s) + ts.append(fun) + ss.append(safe_ref(fun, self._closure)) + for x in range(30): + t = Class2() + ts.append(t) + s = safe_ref(t, self._closure) + ss.append(s) + self.ts = ts + self.ss = ss + self.closureCount = 0 + + def tearDown(self): + del self.ts + del self.ss + + def test_in(self): + """test_in + + Test the "in" operator for safe references (cmp) + + """ + for t in self.ts[:50]: + self.assertTrue(safe_ref(t.x) in self.ss) + + def test_valid(self): + """test_value + + Test that the references are valid (return instance methods) + + """ + for s in self.ss: + self.assertTrue(s()) + + def test_shortcircuit(self): + """test_shortcircuit + + Test that creation short-circuits to reuse existing references + + """ + sd = {} + for s in self.ss: + sd[s] = 1 + for t in self.ts: + if hasattr(t, 'x'): + self.assertIn(safe_ref(t.x), sd) + else: + self.assertIn(safe_ref(t), sd) + + def test_representation(self): + """test_representation + + Test that the reference object's representation works + + XXX Doesn't currently check the results, just that no error + is raised + """ + repr(self.ss[-1]) + + def _closure(self, ref): + """Dumb utility mechanism to increment deletion counter""" + self.closureCount += 1 diff --git a/celery/tests/utils/test_serialization.py b/celery/tests/utils/test_serialization.py new file mode 100644 index 0000000..53dfdad --- /dev/null +++ b/celery/tests/utils/test_serialization.py @@ -0,0 +1,42 @@ +from __future__ import absolute_import + +import sys + +from celery.utils.serialization import ( + UnpickleableExceptionWrapper, + get_pickleable_etype, +) + +from celery.tests.case import Case, mask_modules + + +class test_AAPickle(Case): + + def test_no_cpickle(self): + prev = sys.modules.pop('celery.utils.serialization', None) + try: + with mask_modules('cPickle'): + from celery.utils.serialization import pickle + import pickle as orig_pickle + self.assertIs(pickle.dumps, orig_pickle.dumps) + finally: + sys.modules['celery.utils.serialization'] = prev + + +class test_UnpickleExceptionWrapper(Case): + + def test_init(self): + x = UnpickleableExceptionWrapper('foo', 'Bar', [10, lambda x: x]) + self.assertTrue(x.exc_args) + self.assertEqual(len(x.exc_args), 2) + + +class test_get_pickleable_etype(Case): + + def test_get_pickleable_etype(self): + + class Unpickleable(Exception): + def __reduce__(self): + raise ValueError('foo') + + self.assertIs(get_pickleable_etype(Unpickleable), Exception) diff --git a/celery/tests/utils/test_sysinfo.py b/celery/tests/utils/test_sysinfo.py new file mode 100644 index 0000000..4cd32c7 --- /dev/null +++ b/celery/tests/utils/test_sysinfo.py @@ -0,0 +1,33 @@ +from __future__ import absolute_import + +import os + +from celery.utils.sysinfo import load_average, df + +from celery.tests.case import Case, SkipTest, patch + + +class test_load_average(Case): + + def test_avg(self): + if not hasattr(os, 'getloadavg'): + raise SkipTest('getloadavg not available') + with patch('os.getloadavg') as getloadavg: + getloadavg.return_value = 0.54736328125, 0.6357421875, 0.69921875 + l = load_average() + self.assertTrue(l) + self.assertEqual(l, (0.55, 0.64, 0.7)) + + +class test_df(Case): + + def test_df(self): + try: + from posix import statvfs_result # noqa + except ImportError: + raise SkipTest('statvfs not available') + x = df('/') + self.assertTrue(x.total_blocks) + self.assertTrue(x.available) + self.assertTrue(x.capacity) + self.assertTrue(x.stat) diff --git a/celery/tests/utils/test_term.py b/celery/tests/utils/test_term.py new file mode 100644 index 0000000..1bd7e43 --- /dev/null +++ b/celery/tests/utils/test_term.py @@ -0,0 +1,89 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import, unicode_literals + +import sys + +from celery.utils import term +from celery.utils.term import colored, fg +from celery.five import text_t + +from celery.tests.case import Case, SkipTest + + +class test_colored(Case): + + def setUp(self): + if sys.platform == 'win32': + raise SkipTest('Colors not supported on Windows') + + self._prev_encoding = sys.getdefaultencoding + + def getdefaultencoding(): + return 'utf-8' + + sys.getdefaultencoding = getdefaultencoding + + def tearDown(self): + sys.getdefaultencoding = self._prev_encoding + + def test_colors(self): + colors = ( + ('black', term.BLACK), + ('red', term.RED), + ('green', term.GREEN), + ('yellow', term.YELLOW), + ('blue', term.BLUE), + ('magenta', term.MAGENTA), + ('cyan', term.CYAN), + ('white', term.WHITE), + ) + + for name, key in colors: + self.assertIn(fg(30 + key), str(colored().names[name]('foo'))) + + self.assertTrue(str(colored().bold('f'))) + self.assertTrue(str(colored().underline('f'))) + self.assertTrue(str(colored().blink('f'))) + self.assertTrue(str(colored().reverse('f'))) + self.assertTrue(str(colored().bright('f'))) + self.assertTrue(str(colored().ired('f'))) + self.assertTrue(str(colored().igreen('f'))) + self.assertTrue(str(colored().iyellow('f'))) + self.assertTrue(str(colored().iblue('f'))) + self.assertTrue(str(colored().imagenta('f'))) + self.assertTrue(str(colored().icyan('f'))) + self.assertTrue(str(colored().iwhite('f'))) + self.assertTrue(str(colored().reset('f'))) + + self.assertTrue(text_t(colored().green('∂bar'))) + + self.assertTrue( + colored().red('éefoo') + colored().green('∂bar')) + + self.assertEqual( + colored().red('foo').no_color(), 'foo') + + self.assertTrue( + repr(colored().blue('åfoo'))) + + self.assertIn("''", repr(colored())) + + c = colored() + s = c.red('foo', c.blue('bar'), c.green('baz')) + self.assertTrue(s.no_color()) + + c._fold_no_color(s, 'øfoo') + c._fold_no_color('fooå', s) + + c = colored().red('åfoo') + self.assertEqual( + c._add(c, 'baræ'), + '\x1b[1;31m\xe5foo\x1b[0mbar\xe6', + ) + + c2 = colored().blue('ƒƒz') + c3 = c._add(c, c2) + self.assertEqual( + c3, + '\x1b[1;31m\xe5foo\x1b[0m\x1b[1;34m\u0192\u0192z\x1b[0m', + ) diff --git a/celery/tests/utils/test_text.py b/celery/tests/utils/test_text.py new file mode 100644 index 0000000..383bdb6 --- /dev/null +++ b/celery/tests/utils/test_text.py @@ -0,0 +1,88 @@ +from __future__ import absolute_import + +from celery.utils.text import ( + indent, + ensure_2lines, + abbr, + truncate, + abbrtask, + pretty, +) +from celery.tests.case import AppCase, Case + +RANDTEXT = """\ +The quick brown +fox jumps +over the +lazy dog\ +""" + +RANDTEXT_RES = """\ + The quick brown + fox jumps + over the + lazy dog\ +""" + +QUEUES = { + 'queue1': { + 'exchange': 'exchange1', + 'exchange_type': 'type1', + 'routing_key': 'bind1', + }, + 'queue2': { + 'exchange': 'exchange2', + 'exchange_type': 'type2', + 'routing_key': 'bind2', + }, +} + + +QUEUE_FORMAT1 = '.> queue1 exchange=exchange1(type1) key=bind1' +QUEUE_FORMAT2 = '.> queue2 exchange=exchange2(type2) key=bind2' + + +class test_Info(AppCase): + + def test_textindent(self): + self.assertEqual(indent(RANDTEXT, 4), RANDTEXT_RES) + + def test_format_queues(self): + self.app.amqp.queues = self.app.amqp.Queues(QUEUES) + self.assertEqual(sorted(self.app.amqp.queues.format().split('\n')), + sorted([QUEUE_FORMAT1, QUEUE_FORMAT2])) + + def test_ensure_2lines(self): + self.assertEqual( + len(ensure_2lines('foo\nbar\nbaz\n').splitlines()), 3, + ) + self.assertEqual( + len(ensure_2lines('foo\nbar').splitlines()), 2, + ) + + +class test_utils(Case): + + def test_truncate_text(self): + self.assertEqual(truncate('ABCDEFGHI', 3), 'ABC...') + self.assertEqual(truncate('ABCDEFGHI', 10), 'ABCDEFGHI') + + def test_abbr(self): + self.assertEqual(abbr(None, 3), '???') + self.assertEqual(abbr('ABCDEFGHI', 6), 'ABC...') + self.assertEqual(abbr('ABCDEFGHI', 20), 'ABCDEFGHI') + self.assertEqual(abbr('ABCDEFGHI', 6, None), 'ABCDEF') + + def test_abbrtask(self): + self.assertEqual(abbrtask(None, 3), '???') + self.assertEqual( + abbrtask('feeds.tasks.refresh', 10), + '[.]refresh', + ) + self.assertEqual( + abbrtask('feeds.tasks.refresh', 30), + 'feeds.tasks.refresh', + ) + + def test_pretty(self): + self.assertTrue(pretty(('a', 'b', 'c'))) diff --git a/celery/tests/utils/test_threads.py b/celery/tests/utils/test_threads.py new file mode 100644 index 0000000..4c85b23 --- /dev/null +++ b/celery/tests/utils/test_threads.py @@ -0,0 +1,107 @@ +from __future__ import absolute_import + +from celery.utils.threads import ( + _LocalStack, + _FastLocalStack, + LocalManager, + Local, + bgThread, +) + +from celery.tests.case import Case, override_stdouts, patch + + +class test_bgThread(Case): + + def test_crash(self): + + class T(bgThread): + + def body(self): + raise KeyError() + + with patch('os._exit') as _exit: + with override_stdouts(): + _exit.side_effect = ValueError() + t = T() + with self.assertRaises(ValueError): + t.run() + _exit.assert_called_with(1) + + def test_interface(self): + x = bgThread() + with self.assertRaises(NotImplementedError): + x.body() + + +class test_Local(Case): + + def test_iter(self): + x = Local() + x.foo = 'bar' + ident = x.__ident_func__() + self.assertIn((ident, {'foo': 'bar'}), list(iter(x))) + + delattr(x, 'foo') + self.assertNotIn((ident, {'foo': 'bar'}), list(iter(x))) + with self.assertRaises(AttributeError): + delattr(x, 'foo') + + self.assertIsNotNone(x(lambda: 'foo')) + + +class test_LocalStack(Case): + + def test_stack(self): + x = _LocalStack() + self.assertIsNone(x.pop()) + x.__release_local__() + ident = x.__ident_func__ + x.__ident_func__ = ident + + with self.assertRaises(RuntimeError): + x()[0] + + x.push(['foo']) + self.assertEqual(x()[0], 'foo') + x.pop() + with self.assertRaises(RuntimeError): + x()[0] + + +class test_FastLocalStack(Case): + + def test_stack(self): + x = _FastLocalStack() + x.push(['foo']) + x.push(['bar']) + self.assertEqual(x.top, ['bar']) + self.assertEqual(len(x), 2) + x.pop() + self.assertEqual(x.top, ['foo']) + x.pop() + self.assertIsNone(x.top) + + +class test_LocalManager(Case): + + def test_init(self): + x = LocalManager() + self.assertListEqual(x.locals, []) + self.assertTrue(x.ident_func) + + ident = lambda: 1 + loc = Local() + x = LocalManager([loc], ident_func=ident) + self.assertListEqual(x.locals, [loc]) + x = LocalManager(loc, ident_func=ident) + self.assertListEqual(x.locals, [loc]) + self.assertIs(x.ident_func, ident) + self.assertIs(x.locals[0].__ident_func__, ident) + self.assertEqual(x.get_ident(), 1) + + with patch('celery.utils.threads.release_local') as release: + x.cleanup() + release.assert_called_with(loc) + + self.assertTrue(repr(x)) diff --git a/celery/tests/utils/test_timer2.py b/celery/tests/utils/test_timer2.py new file mode 100644 index 0000000..cb18c21 --- /dev/null +++ b/celery/tests/utils/test_timer2.py @@ -0,0 +1,187 @@ +from __future__ import absolute_import + +import sys +import time + +import celery.utils.timer2 as timer2 + +from celery.tests.case import Case, Mock, patch +from kombu.tests.case import redirect_stdouts + + +class test_Entry(Case): + + def test_call(self): + scratch = [None] + + def timed(x, y, moo='foo'): + scratch[0] = (x, y, moo) + + tref = timer2.Entry(timed, (4, 4), {'moo': 'baz'}) + tref() + + self.assertTupleEqual(scratch[0], (4, 4, 'baz')) + + def test_cancel(self): + tref = timer2.Entry(lambda x: x, (1, ), {}) + tref.cancel() + self.assertTrue(tref.cancelled) + + def test_repr(self): + tref = timer2.Entry(lambda x: x(1, ), {}) + self.assertTrue(repr(tref)) + + +class test_Schedule(Case): + + def test_supports_Timer_interface(self): + x = timer2.Schedule() + x.stop() + + tref = Mock() + x.cancel(tref) + tref.cancel.assert_called_with() + + self.assertIs(x.schedule, x) + + def test_handle_error(self): + from datetime import datetime + scratch = [None] + + def on_error(exc_info): + scratch[0] = exc_info + + s = timer2.Schedule(on_error=on_error) + + with patch('kombu.async.timer.to_timestamp') as tot: + tot.side_effect = OverflowError() + s.enter_at(timer2.Entry(lambda: None, (), {}), + eta=datetime.now()) + s.enter_at(timer2.Entry(lambda: None, (), {}), eta=None) + s.on_error = None + with self.assertRaises(OverflowError): + s.enter_at(timer2.Entry(lambda: None, (), {}), + eta=datetime.now()) + exc = scratch[0] + self.assertIsInstance(exc, OverflowError) + + +class test_Timer(Case): + + def test_enter_after(self): + t = timer2.Timer() + try: + done = [False] + + def set_done(): + done[0] = True + + t.call_after(0.3, set_done) + mss = 0 + while not done[0]: + if mss >= 2.0: + raise Exception('test timed out') + time.sleep(0.1) + mss += 0.1 + finally: + t.stop() + + def test_exit_after(self): + t = timer2.Timer() + t.call_after = Mock() + t.exit_after(0.3, priority=10) + t.call_after.assert_called_with(0.3, sys.exit, 10) + + def test_ensure_started_not_started(self): + t = timer2.Timer() + t.running = True + t.start = Mock() + t.ensure_started() + self.assertFalse(t.start.called) + + def test_call_repeatedly(self): + t = timer2.Timer() + try: + t.schedule.enter_after = Mock() + + myfun = Mock() + myfun.__name__ = 'myfun' + t.call_repeatedly(0.03, myfun) + + self.assertEqual(t.schedule.enter_after.call_count, 1) + args1, _ = t.schedule.enter_after.call_args_list[0] + sec1, tref1, _ = args1 + self.assertEqual(sec1, 0.03) + tref1() + + self.assertEqual(t.schedule.enter_after.call_count, 2) + args2, _ = t.schedule.enter_after.call_args_list[1] + sec2, tref2, _ = args2 + self.assertEqual(sec2, 0.03) + tref2.cancelled = True + tref2() + + self.assertEqual(t.schedule.enter_after.call_count, 2) + finally: + t.stop() + + @patch('kombu.async.timer.logger') + def test_apply_entry_error_handled(self, logger): + t = timer2.Timer() + t.schedule.on_error = None + + fun = Mock() + fun.side_effect = ValueError() + + t.schedule.apply_entry(fun) + self.assertTrue(logger.error.called) + + @redirect_stdouts + def test_apply_entry_error_not_handled(self, stdout, stderr): + t = timer2.Timer() + t.schedule.on_error = Mock() + + fun = Mock() + fun.side_effect = ValueError() + t.schedule.apply_entry(fun) + fun.assert_called_with() + self.assertFalse(stderr.getvalue()) + + @patch('os._exit') + def test_thread_crash(self, _exit): + t = timer2.Timer() + t._next_entry = Mock() + t._next_entry.side_effect = OSError(131) + t.run() + _exit.assert_called_with(1) + + def test_gc_race_lost(self): + t = timer2.Timer() + t._is_stopped.set = Mock() + t._is_stopped.set.side_effect = TypeError() + + t._is_shutdown.set() + t.run() + t._is_stopped.set.assert_called_with() + + def test_to_timestamp(self): + self.assertIs(timer2.to_timestamp(3.13), 3.13) + + def test_test_enter(self): + t = timer2.Timer() + t._do_enter = Mock() + e = Mock() + t.enter(e, 13, 0) + t._do_enter.assert_called_with('enter_at', e, 13, priority=0) + + def test_test_enter_after(self): + t = timer2.Timer() + t._do_enter = Mock() + t.enter_after() + t._do_enter.assert_called_with('enter_after') + + def test_cancel(self): + t = timer2.Timer() + tref = Mock() + t.cancel(tref) + tref.cancel.assert_called_with() diff --git a/celery/tests/utils/test_timeutils.py b/celery/tests/utils/test_timeutils.py new file mode 100644 index 0000000..2258d06 --- /dev/null +++ b/celery/tests/utils/test_timeutils.py @@ -0,0 +1,267 @@ +from __future__ import absolute_import + +import pytz + +from datetime import datetime, timedelta, tzinfo +from pytz import AmbiguousTimeError + +from celery.utils.timeutils import ( + delta_resolution, + humanize_seconds, + maybe_iso8601, + maybe_timedelta, + timedelta_seconds, + timezone, + rate, + remaining, + make_aware, + maybe_make_aware, + localize, + LocalTimezone, + ffwd, + utcoffset, +) +from celery.utils.iso8601 import parse_iso8601 +from celery.tests.case import Case, Mock, patch + + +class test_LocalTimezone(Case): + + def test_daylight(self): + with patch('celery.utils.timeutils._time') as time: + time.timezone = 3600 + time.daylight = False + x = LocalTimezone() + self.assertEqual(x.STDOFFSET, timedelta(seconds=-3600)) + self.assertEqual(x.DSTOFFSET, x.STDOFFSET) + time.daylight = True + time.altzone = 3600 + y = LocalTimezone() + self.assertEqual(y.STDOFFSET, timedelta(seconds=-3600)) + self.assertEqual(y.DSTOFFSET, timedelta(seconds=-3600)) + + self.assertTrue(repr(y)) + + y._isdst = Mock() + y._isdst.return_value = True + self.assertTrue(y.utcoffset(datetime.now())) + self.assertFalse(y.dst(datetime.now())) + y._isdst.return_value = False + self.assertTrue(y.utcoffset(datetime.now())) + self.assertFalse(y.dst(datetime.now())) + + self.assertTrue(y.tzname(datetime.now())) + + +class test_iso8601(Case): + + def test_parse_with_timezone(self): + d = datetime.utcnow().replace(tzinfo=pytz.utc) + self.assertEqual(parse_iso8601(d.isoformat()), d) + # 2013-06-07T20:12:51.775877+00:00 + iso = d.isoformat() + iso1 = iso.replace('+00:00', '-01:00') + d1 = parse_iso8601(iso1) + self.assertEqual(d1.tzinfo._minutes, -60) + iso2 = iso.replace('+00:00', '+01:00') + d2 = parse_iso8601(iso2) + self.assertEqual(d2.tzinfo._minutes, +60) + iso3 = iso.replace('+00:00', 'Z') + d3 = parse_iso8601(iso3) + self.assertEqual(d3.tzinfo, pytz.UTC) + + +class test_timeutils(Case): + + def test_delta_resolution(self): + D = delta_resolution + dt = datetime(2010, 3, 30, 11, 50, 58, 41065) + deltamap = ((timedelta(days=2), datetime(2010, 3, 30, 0, 0)), + (timedelta(hours=2), datetime(2010, 3, 30, 11, 0)), + (timedelta(minutes=2), datetime(2010, 3, 30, 11, 50)), + (timedelta(seconds=2), dt)) + for delta, shoulda in deltamap: + self.assertEqual(D(dt, delta), shoulda) + + def test_timedelta_seconds(self): + deltamap = ((timedelta(seconds=1), 1), + (timedelta(seconds=27), 27), + (timedelta(minutes=3), 3 * 60), + (timedelta(hours=4), 4 * 60 * 60), + (timedelta(days=3), 3 * 86400)) + for delta, seconds in deltamap: + self.assertEqual(timedelta_seconds(delta), seconds) + + def test_timedelta_seconds_returns_0_on_negative_time(self): + delta = timedelta(days=-2) + self.assertEqual(timedelta_seconds(delta), 0) + + def test_humanize_seconds(self): + t = ((4 * 60 * 60 * 24, '4.00 days'), + (1 * 60 * 60 * 24, '1.00 day'), + (4 * 60 * 60, '4.00 hours'), + (1 * 60 * 60, '1.00 hour'), + (4 * 60, '4.00 minutes'), + (1 * 60, '1.00 minute'), + (4, '4.00 seconds'), + (1, '1.00 second'), + (4.3567631221, '4.36 seconds'), + (0, 'now')) + + for seconds, human in t: + self.assertEqual(humanize_seconds(seconds), human) + + self.assertEqual(humanize_seconds(4, prefix='about '), + 'about 4.00 seconds') + + def test_maybe_iso8601_datetime(self): + now = datetime.now() + self.assertIs(maybe_iso8601(now), now) + + def test_maybe_timedelta(self): + D = maybe_timedelta + + for i in (30, 30.6): + self.assertEqual(D(i), timedelta(seconds=i)) + + self.assertEqual(D(timedelta(days=2)), timedelta(days=2)) + + def test_remaining_relative(self): + remaining(datetime.utcnow(), timedelta(hours=1), relative=True) + + +class test_timezone(Case): + + def test_get_timezone_with_pytz(self): + self.assertTrue(timezone.get_timezone('UTC')) + + def test_tz_or_local(self): + self.assertEqual(timezone.tz_or_local(), timezone.local) + self.assertTrue(timezone.tz_or_local(timezone.utc)) + + def test_to_local(self): + self.assertTrue( + timezone.to_local(make_aware(datetime.utcnow(), timezone.utc)), + ) + self.assertTrue( + timezone.to_local(datetime.utcnow()) + ) + + def test_to_local_fallback(self): + self.assertTrue( + timezone.to_local_fallback( + make_aware(datetime.utcnow(), timezone.utc)), + ) + self.assertTrue( + timezone.to_local_fallback(datetime.utcnow()) + ) + + +class test_make_aware(Case): + + def test_tz_without_localize(self): + tz = tzinfo() + self.assertFalse(hasattr(tz, 'localize')) + wtz = make_aware(datetime.utcnow(), tz) + self.assertEqual(wtz.tzinfo, tz) + + def test_when_has_localize(self): + + class tzz(tzinfo): + raises = False + + def localize(self, dt, is_dst=None): + self.localized = True + if self.raises and is_dst is None: + self.raised = True + raise AmbiguousTimeError() + return 1 # needed by min() in Python 3 (None not hashable) + + tz = tzz() + make_aware(datetime.utcnow(), tz) + self.assertTrue(tz.localized) + + tz2 = tzz() + tz2.raises = True + make_aware(datetime.utcnow(), tz2) + self.assertTrue(tz2.localized) + self.assertTrue(tz2.raised) + + def test_maybe_make_aware(self): + aware = datetime.utcnow().replace(tzinfo=timezone.utc) + self.assertTrue(maybe_make_aware(aware), timezone.utc) + naive = datetime.utcnow() + self.assertTrue(maybe_make_aware(naive)) + + +class test_localize(Case): + + def test_tz_without_normalize(self): + tz = tzinfo() + self.assertFalse(hasattr(tz, 'normalize')) + self.assertTrue(localize(make_aware(datetime.utcnow(), tz), tz)) + + def test_when_has_normalize(self): + + class tzz(tzinfo): + raises = None + + def normalize(self, dt, **kwargs): + self.normalized = True + if self.raises and kwargs and kwargs.get('is_dst') is None: + self.raised = True + raise self.raises + return 1 # needed by min() in Python 3 (None not hashable) + + tz = tzz() + localize(make_aware(datetime.utcnow(), tz), tz) + self.assertTrue(tz.normalized) + + tz2 = tzz() + tz2.raises = AmbiguousTimeError() + localize(make_aware(datetime.utcnow(), tz2), tz2) + self.assertTrue(tz2.normalized) + self.assertTrue(tz2.raised) + + tz3 = tzz() + tz3.raises = TypeError() + localize(make_aware(datetime.utcnow(), tz3), tz3) + self.assertTrue(tz3.normalized) + self.assertTrue(tz3.raised) + + +class test_rate_limit_string(Case): + + def test_conversion(self): + self.assertEqual(rate(999), 999) + self.assertEqual(rate(7.5), 7.5) + self.assertEqual(rate('2.5/s'), 2.5) + self.assertEqual(rate('1456/s'), 1456) + self.assertEqual(rate('100/m'), + 100 / 60.0) + self.assertEqual(rate('10/h'), + 10 / 60.0 / 60.0) + + for zero in (0, None, '0', '0/m', '0/h', '0/s', '0.0/s'): + self.assertEqual(rate(zero), 0) + + +class test_ffwd(Case): + + def test_repr(self): + x = ffwd(year=2012) + self.assertTrue(repr(x)) + + def test_radd_with_unknown_gives_NotImplemented(self): + x = ffwd(year=2012) + self.assertEqual(x.__radd__(object()), NotImplemented) + + +class test_utcoffset(Case): + + def test_utcoffset(self): + with patch('celery.utils.timeutils._time') as _time: + _time.daylight = True + self.assertIsNotNone(utcoffset()) + _time.daylight = False + self.assertIsNotNone(utcoffset()) diff --git a/celery/tests/utils/test_utils.py b/celery/tests/utils/test_utils.py new file mode 100644 index 0000000..2837ad6 --- /dev/null +++ b/celery/tests/utils/test_utils.py @@ -0,0 +1,108 @@ +from __future__ import absolute_import + +import pytz + +from datetime import datetime, date, time, timedelta + +from kombu import Queue + +from celery.utils import ( + chunks, + is_iterable, + cached_property, + warn_deprecated, + worker_direct, + gen_task_name, + jsonify, +) +from celery.tests.case import Case, Mock, patch + + +def double(x): + return x * 2 + + +class test_worker_direct(Case): + + def test_returns_if_queue(self): + q = Queue('foo') + self.assertIs(worker_direct(q), q) + + +class test_gen_task_name(Case): + + def test_no_module(self): + app = Mock() + app.name == '__main__' + self.assertTrue(gen_task_name(app, 'foo', 'axsadaewe')) + + +class test_jsonify(Case): + + def test_simple(self): + self.assertTrue(jsonify(Queue('foo'))) + self.assertTrue(jsonify(['foo', 'bar', 'baz'])) + self.assertTrue(jsonify({'foo': 'bar'})) + self.assertTrue(jsonify(datetime.utcnow())) + self.assertTrue(jsonify(datetime.utcnow().replace(tzinfo=pytz.utc))) + self.assertTrue(jsonify(datetime.utcnow().replace(microsecond=0))) + self.assertTrue(jsonify(date(2012, 1, 1))) + self.assertTrue(jsonify(time(hour=1, minute=30))) + self.assertTrue(jsonify(time(hour=1, minute=30, microsecond=3))) + self.assertTrue(jsonify(timedelta(seconds=30))) + self.assertTrue(jsonify(10)) + self.assertTrue(jsonify(10.3)) + self.assertTrue(jsonify('hello')) + + with self.assertRaises(ValueError): + jsonify(object()) + + +class test_chunks(Case): + + def test_chunks(self): + + # n == 2 + x = chunks(iter([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]), 2) + self.assertListEqual( + list(x), + [[0, 1], [2, 3], [4, 5], [6, 7], [8, 9], [10]], + ) + + # n == 3 + x = chunks(iter([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]), 3) + self.assertListEqual( + list(x), + [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 10]], + ) + + # n == 2 (exact) + x = chunks(iter([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), 2) + self.assertListEqual( + list(x), + [[0, 1], [2, 3], [4, 5], [6, 7], [8, 9]], + ) + + +class test_utils(Case): + + def test_is_iterable(self): + for a in 'f', ['f'], ('f', ), {'f': 'f'}: + self.assertTrue(is_iterable(a)) + for b in object(), 1: + self.assertFalse(is_iterable(b)) + + def test_cached_property(self): + + def fun(obj): + return fun.value + + x = cached_property(fun) + self.assertIs(x.__get__(None), x) + self.assertIs(x.__set__(None, None), x) + self.assertIs(x.__delete__(None), x) + + @patch('warnings.warn') + def test_warn_deprecated(self, warn): + warn_deprecated('Foo') + self.assertTrue(warn.called) diff --git a/celery/tests/worker/__init__.py b/celery/tests/worker/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/celery/tests/worker/test_autoreload.py b/celery/tests/worker/test_autoreload.py new file mode 100644 index 0000000..e61b330 --- /dev/null +++ b/celery/tests/worker/test_autoreload.py @@ -0,0 +1,328 @@ +from __future__ import absolute_import + +import errno +import select +import sys + +from time import time + +from celery.worker import autoreload +from celery.worker.autoreload import ( + WorkerComponent, + file_hash, + BaseMonitor, + StatMonitor, + KQueueMonitor, + InotifyMonitor, + default_implementation, + Autoreloader, +) + +from celery.tests.case import AppCase, Case, Mock, SkipTest, patch, mock_open + + +class test_WorkerComponent(AppCase): + + def test_create_threaded(self): + w = Mock() + w.use_eventloop = False + x = WorkerComponent(w) + x.instantiate = Mock() + r = x.create(w) + x.instantiate.assert_called_with(w.autoreloader_cls, w) + self.assertIs(r, w.autoreloader) + + @patch('select.kevent', create=True) + @patch('select.kqueue', create=True) + @patch('kombu.utils.eventio.kqueue') + def test_create_ev(self, kq, kqueue, kevent): + w = Mock() + w.use_eventloop = True + x = WorkerComponent(w) + x.instantiate = Mock() + r = x.create(w) + x.instantiate.assert_called_with(w.autoreloader_cls, w) + x.register_with_event_loop(w, w.hub) + self.assertIsNone(r) + w.hub.on_close.add.assert_called_with( + w.autoreloader.on_event_loop_close, + ) + + +class test_file_hash(Case): + + def test_hash(self): + with mock_open() as a: + a.write('the quick brown fox\n') + a.seek(0) + A = file_hash('foo') + with mock_open() as b: + b.write('the quick brown bar\n') + b.seek(0) + B = file_hash('bar') + self.assertNotEqual(A, B) + + +class test_BaseMonitor(Case): + + def test_start_stop_on_change(self): + x = BaseMonitor(['a', 'b']) + + with self.assertRaises(NotImplementedError): + x.start() + x.stop() + x.on_change([]) + x._on_change = Mock() + x.on_change('foo') + x._on_change.assert_called_with('foo') + + +class test_StatMonitor(Case): + + @patch('os.stat') + def test_start(self, stat): + + class st(object): + st_mtime = time() + stat.return_value = st() + x = StatMonitor(['a', 'b']) + + def on_is_set(): + if x.shutdown_event.is_set.call_count > 3: + return True + return False + x.shutdown_event = Mock() + x.shutdown_event.is_set.side_effect = on_is_set + + x.start() + x.shutdown_event = Mock() + stat.side_effect = OSError() + x.start() + + @patch('os.stat') + def test_mtime_stat_raises(self, stat): + stat.side_effect = ValueError() + x = StatMonitor(['a', 'b']) + x._mtime('a') + + +class test_KQueueMonitor(Case): + + @patch('select.kqueue', create=True) + @patch('os.close') + def test_stop(self, close, kqueue): + x = KQueueMonitor(['a', 'b']) + x.poller = Mock() + x.filemap['a'] = 10 + x.stop() + x.poller.close.assert_called_with() + close.assert_called_with(10) + + close.side_effect = OSError() + close.side_effect.errno = errno.EBADF + x.stop() + + def test_register_with_event_loop(self): + from kombu.utils import eventio + if eventio.kqueue is None: + raise SkipTest('version of kombu does not work with pypy') + x = KQueueMonitor(['a', 'b']) + hub = Mock(name='hub') + x.add_events = Mock(name='add_events()') + x.register_with_event_loop(hub) + x.add_events.assert_called_with(x._kq) + self.assertEqual( + x._kq.on_file_change, + x.handle_event, + ) + + def test_on_event_loop_close(self): + x = KQueueMonitor(['a', 'b']) + x.close = Mock() + x._kq = Mock(name='_kq') + x.on_event_loop_close(Mock(name='hub')) + x.close.assert_called_with(x._kq) + + def test_handle_event(self): + x = KQueueMonitor(['a', 'b']) + x.on_change = Mock() + eA = Mock() + eA.ident = 'a' + eB = Mock() + eB.ident = 'b' + x.fdmap = {'a': 'A', 'b': 'B'} + x.handle_event([eA, eB]) + x.on_change.assert_called_with(['A', 'B']) + + @patch('kombu.utils.eventio.kqueue', create=True) + @patch('kombu.utils.eventio.kevent', create=True) + @patch('os.open') + @patch('select.kqueue', create=True) + def test_start(self, _kq, osopen, kevent, kqueue): + from kombu.utils import eventio + prev_poll, eventio.poll = eventio.poll, kqueue + prev = {} + flags = ['KQ_FILTER_VNODE', 'KQ_EV_ADD', 'KQ_EV_ENABLE', + 'KQ_EV_CLEAR', 'KQ_NOTE_WRITE', 'KQ_NOTE_EXTEND'] + for i, flag in enumerate(flags): + prev[flag] = getattr(eventio, flag, None) + if not prev[flag]: + setattr(eventio, flag, i) + try: + kq = kqueue.return_value = Mock() + + class ev(object): + ident = 10 + filter = eventio.KQ_FILTER_VNODE + fflags = eventio.KQ_NOTE_WRITE + kq.control.return_value = [ev()] + x = KQueueMonitor(['a']) + osopen.return_value = 10 + calls = [0] + + def on_is_set(): + calls[0] += 1 + if calls[0] > 2: + return True + return False + x.shutdown_event = Mock() + x.shutdown_event.is_set.side_effect = on_is_set + x.start() + finally: + for flag in flags: + if prev[flag]: + setattr(eventio, flag, prev[flag]) + else: + delattr(eventio, flag) + eventio.poll = prev_poll + + +class test_InotifyMonitor(Case): + + @patch('celery.worker.autoreload.pyinotify') + def test_start(self, inotify): + x = InotifyMonitor(['a']) + inotify.IN_MODIFY = 1 + inotify.IN_ATTRIB = 2 + x.start() + + inotify.WatchManager.side_effect = ValueError() + with self.assertRaises(ValueError): + x.start() + x.stop() + + x._on_change = None + x.process_(Mock()) + x._on_change = Mock() + x.process_(Mock()) + self.assertTrue(x._on_change.called) + + +class test_default_implementation(Case): + + @patch('select.kqueue', create=True) + @patch('kombu.utils.eventio.kqueue', create=True) + def test_kqueue(self, kq, kqueue): + self.assertEqual(default_implementation(), 'kqueue') + + @patch('celery.worker.autoreload.pyinotify') + def test_inotify(self, pyinotify): + kq = getattr(select, 'kqueue', None) + try: + delattr(select, 'kqueue') + except AttributeError: + pass + platform, sys.platform = sys.platform, 'linux' + try: + self.assertEqual(default_implementation(), 'inotify') + ino, autoreload.pyinotify = autoreload.pyinotify, None + try: + self.assertEqual(default_implementation(), 'stat') + finally: + autoreload.pyinotify = ino + finally: + if kq: + select.kqueue = kq + sys.platform = platform + + +class test_Autoreloader(AppCase): + + def test_register_with_event_loop(self): + x = Autoreloader(Mock(), modules=[__name__]) + hub = Mock() + x._monitor = None + x.on_init = Mock() + + def se(*args, **kwargs): + x._monitor = Mock() + x.on_init.side_effect = se + + x.register_with_event_loop(hub) + x.on_init.assert_called_with() + x._monitor.register_with_event_loop.assert_called_with(hub) + + x._monitor.register_with_event_loop.reset_mock() + x.register_with_event_loop(hub) + x._monitor.register_with_event_loop.assert_called_with(hub) + + def test_on_event_loop_close(self): + x = Autoreloader(Mock(), modules=[__name__]) + hub = Mock() + x._monitor = Mock() + x.on_event_loop_close(hub) + x._monitor.on_event_loop_close.assert_called_with(hub) + x._monitor = None + x.on_event_loop_close(hub) + + @patch('celery.worker.autoreload.file_hash') + def test_start(self, fhash): + x = Autoreloader(Mock(), modules=[__name__]) + x.Monitor = Mock() + mon = x.Monitor.return_value = Mock() + mon.start.side_effect = OSError() + mon.start.side_effect.errno = errno.EINTR + x.body() + mon.start.side_effect.errno = errno.ENOENT + with self.assertRaises(OSError): + x.body() + mon.start.side_effect = None + x.body() + + @patch('celery.worker.autoreload.file_hash') + @patch('os.path.exists') + def test_maybe_modified(self, exists, fhash): + exists.return_value = True + fhash.return_value = 'abcd' + x = Autoreloader(Mock(), modules=[__name__]) + x._hashes = {} + x._hashes[__name__] = 'dcba' + self.assertTrue(x._maybe_modified(__name__)) + x._hashes[__name__] = 'abcd' + self.assertFalse(x._maybe_modified(__name__)) + exists.return_value = False + self.assertFalse(x._maybe_modified(__name__)) + + def test_on_change(self): + x = Autoreloader(Mock(), modules=[__name__]) + mm = x._maybe_modified = Mock(0) + mm.return_value = True + x._reload = Mock() + x.file_to_module[__name__] = __name__ + x.on_change([__name__]) + self.assertTrue(x._reload.called) + mm.return_value = False + x.on_change([__name__]) + + def test_reload(self): + x = Autoreloader(Mock(), modules=[__name__]) + x._reload([__name__]) + x.controller.reload.assert_called_with([__name__], reload=True) + + def test_stop(self): + x = Autoreloader(Mock(), modules=[__name__]) + x._monitor = None + x.stop() + x._monitor = Mock() + x.stop() + x._monitor.stop.assert_called_with() diff --git a/celery/tests/worker/test_autoscale.py b/celery/tests/worker/test_autoscale.py new file mode 100644 index 0000000..45ea488 --- /dev/null +++ b/celery/tests/worker/test_autoscale.py @@ -0,0 +1,198 @@ +from __future__ import absolute_import + +import sys + +from celery.concurrency.base import BasePool +from celery.five import monotonic +from celery.worker import state +from celery.worker import autoscale +from celery.tests.case import AppCase, Mock, patch, sleepdeprived + + +class Object(object): + pass + + +class MockPool(BasePool): + shrink_raises_exception = False + shrink_raises_ValueError = False + + def __init__(self, *args, **kwargs): + super(MockPool, self).__init__(*args, **kwargs) + self._pool = Object() + self._pool._processes = self.limit + + def grow(self, n=1): + self._pool._processes += n + + def shrink(self, n=1): + if self.shrink_raises_exception: + raise KeyError('foo') + if self.shrink_raises_ValueError: + raise ValueError('foo') + self._pool._processes -= n + + @property + def num_processes(self): + return self._pool._processes + + +class test_WorkerComponent(AppCase): + + def test_register_with_event_loop(self): + parent = Mock(name='parent') + parent.autoscale = True + parent.consumer.on_task_message = set() + w = autoscale.WorkerComponent(parent) + self.assertIsNone(parent.autoscaler) + self.assertTrue(w.enabled) + + hub = Mock(name='hub') + w.create(parent) + w.register_with_event_loop(parent, hub) + self.assertIn( + parent.autoscaler.maybe_scale, + parent.consumer.on_task_message, + ) + hub.call_repeatedly.assert_called_with( + parent.autoscaler.keepalive, parent.autoscaler.maybe_scale, + ) + + parent.hub = hub + hub.on_init = [] + w.instantiate = Mock() + w.register_with_event_loop(parent, Mock(name='loop')) + self.assertTrue(parent.consumer.on_task_message) + + +class test_Autoscaler(AppCase): + + def setup(self): + self.pool = MockPool(3) + + def test_stop(self): + + class Scaler(autoscale.Autoscaler): + alive = True + joined = False + + def is_alive(self): + return self.alive + + def join(self, timeout=None): + self.joined = True + + worker = Mock(name='worker') + x = Scaler(self.pool, 10, 3, worker=worker) + x._is_stopped.set() + x.stop() + self.assertTrue(x.joined) + x.joined = False + x.alive = False + x.stop() + self.assertFalse(x.joined) + + @sleepdeprived(autoscale) + def test_body(self): + worker = Mock(name='worker') + x = autoscale.Autoscaler(self.pool, 10, 3, worker=worker) + x.body() + self.assertEqual(x.pool.num_processes, 3) + for i in range(20): + state.reserved_requests.add(i) + x.body() + x.body() + self.assertEqual(x.pool.num_processes, 10) + self.assertTrue(worker.consumer._update_prefetch_count.called) + state.reserved_requests.clear() + x.body() + self.assertEqual(x.pool.num_processes, 10) + x._last_action = monotonic() - 10000 + x.body() + self.assertEqual(x.pool.num_processes, 3) + self.assertTrue(worker.consumer._update_prefetch_count.called) + + def test_run(self): + + class Scaler(autoscale.Autoscaler): + scale_called = False + + def body(self): + self.scale_called = True + self._is_shutdown.set() + + worker = Mock(name='worker') + x = Scaler(self.pool, 10, 3, worker=worker) + x.run() + self.assertTrue(x._is_shutdown.isSet()) + self.assertTrue(x._is_stopped.isSet()) + self.assertTrue(x.scale_called) + + def test_shrink_raises_exception(self): + worker = Mock(name='worker') + x = autoscale.Autoscaler(self.pool, 10, 3, worker=worker) + x.scale_up(3) + x._last_action = monotonic() - 10000 + x.pool.shrink_raises_exception = True + x.scale_down(1) + + @patch('celery.worker.autoscale.debug') + def test_shrink_raises_ValueError(self, debug): + worker = Mock(name='worker') + x = autoscale.Autoscaler(self.pool, 10, 3, worker=worker) + x.scale_up(3) + x._last_action = monotonic() - 10000 + x.pool.shrink_raises_ValueError = True + x.scale_down(1) + self.assertTrue(debug.call_count) + + def test_update_and_force(self): + worker = Mock(name='worker') + x = autoscale.Autoscaler(self.pool, 10, 3, worker=worker) + self.assertEqual(x.processes, 3) + x.force_scale_up(5) + self.assertEqual(x.processes, 8) + x.update(5, None) + self.assertEqual(x.processes, 5) + x.force_scale_down(3) + self.assertEqual(x.processes, 2) + x.update(3, None) + self.assertEqual(x.processes, 3) + x.force_scale_down(1000) + self.assertEqual(x.min_concurrency, 0) + self.assertEqual(x.processes, 0) + x.force_scale_up(1000) + x.min_concurrency = 1 + x.force_scale_down(1) + + x.update(max=300, min=10) + x.update(max=300, min=2) + x.update(max=None, min=None) + + def test_info(self): + worker = Mock(name='worker') + x = autoscale.Autoscaler(self.pool, 10, 3, worker=worker) + info = x.info() + self.assertEqual(info['max'], 10) + self.assertEqual(info['min'], 3) + self.assertEqual(info['current'], 3) + + @patch('os._exit') + def test_thread_crash(self, _exit): + + class _Autoscaler(autoscale.Autoscaler): + + def body(self): + self._is_shutdown.set() + raise OSError('foo') + worker = Mock(name='worker') + x = _Autoscaler(self.pool, 10, 3, worker=worker) + + stderr = Mock() + p, sys.stderr = sys.stderr, stderr + try: + x.run() + finally: + sys.stderr = p + _exit.assert_called_with(1) + self.assertTrue(stderr.write.call_count) diff --git a/celery/tests/worker/test_bootsteps.py b/celery/tests/worker/test_bootsteps.py new file mode 100644 index 0000000..522d263 --- /dev/null +++ b/celery/tests/worker/test_bootsteps.py @@ -0,0 +1,338 @@ +from __future__ import absolute_import + +from celery import bootsteps + +from celery.tests.case import AppCase, Mock, patch + + +class test_StepFormatter(AppCase): + + def test_get_prefix(self): + f = bootsteps.StepFormatter() + s = Mock() + s.last = True + self.assertEqual(f._get_prefix(s), f.blueprint_prefix) + + s2 = Mock() + s2.last = False + s2.conditional = True + self.assertEqual(f._get_prefix(s2), f.conditional_prefix) + + s3 = Mock() + s3.last = s3.conditional = False + self.assertEqual(f._get_prefix(s3), '') + + def test_node(self): + f = bootsteps.StepFormatter() + f.draw_node = Mock() + step = Mock() + step.last = False + f.node(step, x=3) + f.draw_node.assert_called_with(step, f.node_scheme, {'x': 3}) + + step.last = True + f.node(step, x=3) + f.draw_node.assert_called_with(step, f.blueprint_scheme, {'x': 3}) + + def test_edge(self): + f = bootsteps.StepFormatter() + f.draw_edge = Mock() + a, b = Mock(), Mock() + a.last = True + f.edge(a, b, x=6) + f.draw_edge.assert_called_with(a, b, f.edge_scheme, { + 'x': 6, 'arrowhead': 'none', 'color': 'darkseagreen3', + }) + + a.last = False + f.edge(a, b, x=6) + f.draw_edge.assert_called_with(a, b, f.edge_scheme, { + 'x': 6, + }) + + +class test_Step(AppCase): + + class Def(bootsteps.StartStopStep): + name = 'test_Step.Def' + + def setup(self): + self.steps = [] + + def test_blueprint_name(self, bp='test_blueprint_name'): + + class X(bootsteps.Step): + blueprint = bp + name = 'X' + self.assertEqual(X.name, 'X') + + class Y(bootsteps.Step): + name = '%s.Y' % bp + self.assertEqual(Y.name, '%s.Y' % bp) + + def test_init(self): + self.assertTrue(self.Def(self)) + + def test_create(self): + self.Def(self).create(self) + + def test_include_if(self): + x = self.Def(self) + x.enabled = True + self.assertTrue(x.include_if(self)) + + x.enabled = False + self.assertFalse(x.include_if(self)) + + def test_instantiate(self): + self.assertIsInstance(self.Def(self).instantiate(self.Def, self), + self.Def) + + def test_include_when_enabled(self): + x = self.Def(self) + x.create = Mock() + x.create.return_value = 'George' + self.assertTrue(x.include(self)) + + self.assertEqual(x.obj, 'George') + x.create.assert_called_with(self) + + def test_include_when_disabled(self): + x = self.Def(self) + x.enabled = False + x.create = Mock() + + self.assertFalse(x.include(self)) + self.assertFalse(x.create.call_count) + + def test_repr(self): + x = self.Def(self) + self.assertTrue(repr(x)) + + +class test_ConsumerStep(AppCase): + + def test_interface(self): + step = bootsteps.ConsumerStep(self) + with self.assertRaises(NotImplementedError): + step.get_consumers(self) + + def test_start_stop_shutdown(self): + consumer = Mock() + self.connection = Mock() + + class Step(bootsteps.ConsumerStep): + + def get_consumers(self, c): + return [consumer] + + step = Step(self) + self.assertEqual(step.get_consumers(self), [consumer]) + + step.start(self) + consumer.consume.assert_called_with() + step.stop(self) + consumer.cancel.assert_called_with() + + step.shutdown(self) + consumer.channel.close.assert_called_with() + + def test_start_no_consumers(self): + self.connection = Mock() + + class Step(bootsteps.ConsumerStep): + + def get_consumers(self, c): + return () + + step = Step(self) + step.start(self) + + +class test_StartStopStep(AppCase): + + class Def(bootsteps.StartStopStep): + name = 'test_StartStopStep.Def' + + def setup(self): + self.steps = [] + + def test_start__stop(self): + x = self.Def(self) + x.create = Mock() + + # include creates the underlying object and sets + # its x.obj attribute to it, as well as appending + # it to the parent.steps list. + x.include(self) + self.assertTrue(self.steps) + self.assertIs(self.steps[0], x) + + x.start(self) + x.obj.start.assert_called_with() + + x.stop(self) + x.obj.stop.assert_called_with() + + x.obj = None + self.assertIsNone(x.start(self)) + + def test_include_when_disabled(self): + x = self.Def(self) + x.enabled = False + x.include(self) + self.assertFalse(self.steps) + + def test_terminate(self): + x = self.Def(self) + x.create = Mock() + + x.include(self) + delattr(x.obj, 'terminate') + x.terminate(self) + x.obj.stop.assert_called_with() + + +class test_Blueprint(AppCase): + + class Blueprint(bootsteps.Blueprint): + name = 'test_Blueprint' + + def test_steps_added_to_unclaimed(self): + + class tnA(bootsteps.Step): + name = 'test_Blueprint.A' + + class tnB(bootsteps.Step): + name = 'test_Blueprint.B' + + class xxA(bootsteps.Step): + name = 'xx.A' + + class Blueprint(self.Blueprint): + default_steps = [tnA, tnB] + blueprint = Blueprint(app=self.app) + + self.assertIn(tnA, blueprint._all_steps()) + self.assertIn(tnB, blueprint._all_steps()) + self.assertNotIn(xxA, blueprint._all_steps()) + + def test_init(self): + blueprint = self.Blueprint(app=self.app) + self.assertIs(blueprint.app, self.app) + self.assertEqual(blueprint.name, 'test_Blueprint') + + def test_close__on_close_is_None(self): + blueprint = self.Blueprint(app=self.app) + blueprint.on_close = None + blueprint.send_all = Mock() + blueprint.close(1) + blueprint.send_all.assert_called_with( + 1, 'close', 'closing', reverse=False, + ) + + def test_send_all_with_None_steps(self): + parent = Mock() + blueprint = self.Blueprint(app=self.app) + parent.steps = [None, None, None] + blueprint.send_all(parent, 'close', 'Closing', reverse=False) + + def test_join_raises_IGNORE_ERRORS(self): + prev, bootsteps.IGNORE_ERRORS = bootsteps.IGNORE_ERRORS, (KeyError, ) + try: + blueprint = self.Blueprint(app=self.app) + blueprint.shutdown_complete = Mock() + blueprint.shutdown_complete.wait.side_effect = KeyError('luke') + blueprint.join(timeout=10) + blueprint.shutdown_complete.wait.assert_called_with(timeout=10) + finally: + bootsteps.IGNORE_ERRORS = prev + + def test_connect_with(self): + + class b1s1(bootsteps.Step): + pass + + class b1s2(bootsteps.Step): + last = True + + class b2s1(bootsteps.Step): + pass + + class b2s2(bootsteps.Step): + last = True + + b1 = self.Blueprint([b1s1, b1s2], app=self.app) + b2 = self.Blueprint([b2s1, b2s2], app=self.app) + b1.apply(Mock()) + b2.apply(Mock()) + b1.connect_with(b2) + + self.assertIn(b1s1, b1.graph) + self.assertIn(b2s1, b1.graph) + self.assertIn(b2s2, b1.graph) + + self.assertTrue(repr(b1s1)) + self.assertTrue(str(b1s1)) + + def test_topsort_raises_KeyError(self): + + class Step(bootsteps.Step): + requires = ('xyxxx.fsdasewe.Unknown', ) + + b = self.Blueprint([Step], app=self.app) + b.steps = b.claim_steps() + with self.assertRaises(ImportError): + b._finalize_steps(b.steps) + Step.requires = () + + b.steps = b.claim_steps() + b._finalize_steps(b.steps) + + with patch('celery.bootsteps.DependencyGraph') as Dep: + g = Dep.return_value = Mock() + g.topsort.side_effect = KeyError('foo') + with self.assertRaises(KeyError): + b._finalize_steps(b.steps) + + def test_apply(self): + + class MyBlueprint(bootsteps.Blueprint): + name = 'test_apply' + + def modules(self): + return ['A', 'B'] + + class B(bootsteps.Step): + name = 'test_apply.B' + + class C(bootsteps.Step): + name = 'test_apply.C' + requires = [B] + + class A(bootsteps.Step): + name = 'test_apply.A' + requires = [C] + + class D(bootsteps.Step): + name = 'test_apply.D' + last = True + + x = MyBlueprint([A, D], app=self.app) + x.apply(self) + + self.assertIsInstance(x.order[0], B) + self.assertIsInstance(x.order[1], C) + self.assertIsInstance(x.order[2], A) + self.assertIsInstance(x.order[3], D) + self.assertIn(A, x.types) + self.assertIs(x[A.name], x.order[2]) + + def test_find_last_but_no_steps(self): + + class MyBlueprint(bootsteps.Blueprint): + name = 'qwejwioqjewoqiej' + + x = MyBlueprint(app=self.app) + x.apply(self) + self.assertIsNone(x._find_last()) diff --git a/celery/tests/worker/test_components.py b/celery/tests/worker/test_components.py new file mode 100644 index 0000000..b39865d --- /dev/null +++ b/celery/tests/worker/test_components.py @@ -0,0 +1,38 @@ +from __future__ import absolute_import + +# some of these are tested in test_worker, so I've only written tests +# here to complete coverage. Should move everyting to this module at some +# point [-ask] + +from celery.worker.components import ( + Queues, + Pool, +) + +from celery.tests.case import AppCase, Mock + + +class test_Queues(AppCase): + + def test_create_when_eventloop(self): + w = Mock() + w.use_eventloop = w.pool_putlocks = w.pool_cls.uses_semaphore = True + q = Queues(w) + q.create(w) + self.assertIs(w.process_task, w._process_task_sem) + + +class test_Pool(AppCase): + + def test_close_terminate(self): + w = Mock() + comp = Pool(w) + pool = w.pool = Mock() + comp.close(w) + pool.close.assert_called_with() + comp.terminate(w) + pool.terminate.assert_called_with() + + w.pool = None + comp.close(w) + comp.terminate(w) diff --git a/celery/tests/worker/test_consumer.py b/celery/tests/worker/test_consumer.py new file mode 100644 index 0000000..b9962a4 --- /dev/null +++ b/celery/tests/worker/test_consumer.py @@ -0,0 +1,490 @@ +from __future__ import absolute_import + +import errno +import socket + +from billiard.exceptions import RestartFreqExceeded + +from celery.datastructures import LimitedSet +from celery.worker import state as worker_state +from celery.worker.consumer import ( + Consumer, + Heart, + Tasks, + Agent, + Mingle, + Gossip, + dump_body, + CLOSE, +) + +from celery.tests.case import AppCase, ContextMock, Mock, SkipTest, call, patch + + +class test_Consumer(AppCase): + + def get_consumer(self, no_hub=False, **kwargs): + consumer = Consumer( + on_task_request=Mock(), + init_callback=Mock(), + pool=Mock(), + app=self.app, + timer=Mock(), + controller=Mock(), + hub=None if no_hub else Mock(), + **kwargs + ) + consumer.blueprint = Mock() + consumer._restart_state = Mock() + consumer.connection = _amqp_connection() + consumer.connection_errors = (socket.error, OSError, ) + return consumer + + def test_taskbuckets_defaultdict(self): + c = self.get_consumer() + self.assertIsNone(c.task_buckets['fooxasdwx.wewe']) + + def test_dump_body_buffer(self): + msg = Mock() + msg.body = 'str' + try: + buf = buffer(msg.body) + except NameError: + raise SkipTest('buffer type not available') + self.assertTrue(dump_body(msg, buf)) + + def test_sets_heartbeat(self): + c = self.get_consumer(amqheartbeat=10) + self.assertEqual(c.amqheartbeat, 10) + self.app.conf.BROKER_HEARTBEAT = 20 + c = self.get_consumer(amqheartbeat=None) + self.assertEqual(c.amqheartbeat, 20) + + def test_gevent_bug_disables_connection_timeout(self): + with patch('celery.worker.consumer._detect_environment') as de: + de.return_value = 'gevent' + self.app.conf.BROKER_CONNECTION_TIMEOUT = 33.33 + self.get_consumer() + self.assertIsNone(self.app.conf.BROKER_CONNECTION_TIMEOUT) + + def test_limit_task(self): + c = self.get_consumer() + + with patch('celery.worker.consumer.task_reserved') as reserved: + bucket = Mock() + request = Mock() + bucket.can_consume.return_value = True + + c._limit_task(request, bucket, 3) + bucket.can_consume.assert_called_with(3) + reserved.assert_called_with(request) + c.on_task_request.assert_called_with(request) + + with patch('celery.worker.consumer.task_reserved') as reserved: + bucket.can_consume.return_value = False + bucket.expected_time.return_value = 3.33 + c._limit_task(request, bucket, 4) + bucket.can_consume.assert_called_with(4) + c.timer.call_after.assert_called_with( + 3.33, c._limit_task, (request, bucket, 4), + ) + bucket.expected_time.assert_called_with(4) + self.assertFalse(reserved.called) + + def test_start_blueprint_raises_EMFILE(self): + c = self.get_consumer() + exc = c.blueprint.start.side_effect = OSError() + exc.errno = errno.EMFILE + + with self.assertRaises(OSError): + c.start() + + def test_max_restarts_exceeded(self): + c = self.get_consumer() + + def se(*args, **kwargs): + c.blueprint.state = CLOSE + raise RestartFreqExceeded() + c._restart_state.step.side_effect = se + c.blueprint.start.side_effect = socket.error() + + with patch('celery.worker.consumer.sleep') as sleep: + c.start() + sleep.assert_called_with(1) + + def _closer(self, c): + def se(*args, **kwargs): + c.blueprint.state = CLOSE + return se + + def test_collects_at_restart(self): + c = self.get_consumer() + c.connection.collect.side_effect = MemoryError() + c.blueprint.start.side_effect = socket.error() + c.blueprint.restart.side_effect = self._closer(c) + c.start() + c.connection.collect.assert_called_with() + + def test_register_with_event_loop(self): + c = self.get_consumer() + c.register_with_event_loop(Mock(name='loop')) + + def test_on_close_clears_semaphore_timer_and_reqs(self): + with patch('celery.worker.consumer.reserved_requests') as reserved: + c = self.get_consumer() + c.on_close() + c.controller.semaphore.clear.assert_called_with() + c.timer.clear.assert_called_with() + reserved.clear.assert_called_with() + c.pool.flush.assert_called_with() + + c.controller = None + c.timer = None + c.pool = None + c.on_close() + + def test_connect_error_handler(self): + self.app.connection = _amqp_connection() + conn = self.app.connection.return_value + c = self.get_consumer() + self.assertTrue(c.connect()) + self.assertTrue(conn.ensure_connection.called) + errback = conn.ensure_connection.call_args[0][0] + conn.alt = [(1, 2, 3)] + errback(Mock(), 0) + + +class test_Heart(AppCase): + + def test_start(self): + c = Mock() + c.timer = Mock() + c.event_dispatcher = Mock() + + with patch('celery.worker.heartbeat.Heart') as hcls: + h = Heart(c) + self.assertTrue(h.enabled) + self.assertEqual(h.heartbeat_interval, None) + self.assertIsNone(c.heart) + + h.start(c) + self.assertTrue(c.heart) + hcls.assert_called_with(c.timer, c.event_dispatcher, + h.heartbeat_interval) + c.heart.start.assert_called_with() + + def test_start_heartbeat_interval(self): + c = Mock() + c.timer = Mock() + c.event_dispatcher = Mock() + + with patch('celery.worker.heartbeat.Heart') as hcls: + h = Heart(c, False, 20) + self.assertTrue(h.enabled) + self.assertEqual(h.heartbeat_interval, 20) + self.assertIsNone(c.heart) + + h.start(c) + self.assertTrue(c.heart) + hcls.assert_called_with(c.timer, c.event_dispatcher, + h.heartbeat_interval) + c.heart.start.assert_called_with() + + +class test_Tasks(AppCase): + + def test_stop(self): + c = Mock() + tasks = Tasks(c) + self.assertIsNone(c.task_consumer) + self.assertIsNone(c.qos) + + c.task_consumer = Mock() + tasks.stop(c) + + def test_stop_already_stopped(self): + c = Mock() + tasks = Tasks(c) + tasks.stop(c) + + +class test_Agent(AppCase): + + def test_start(self): + c = Mock() + agent = Agent(c) + agent.instantiate = Mock() + agent.agent_cls = 'foo:Agent' + self.assertIsNotNone(agent.create(c)) + agent.instantiate.assert_called_with(agent.agent_cls, c.connection) + + +class test_Mingle(AppCase): + + def test_start_no_replies(self): + c = Mock() + c.app.connection = _amqp_connection() + mingle = Mingle(c) + I = c.app.control.inspect.return_value = Mock() + I.hello.return_value = {} + mingle.start(c) + + def test_start(self): + try: + c = Mock() + c.app.connection = _amqp_connection() + mingle = Mingle(c) + self.assertTrue(mingle.enabled) + + Aig = LimitedSet() + Big = LimitedSet() + Aig.add('Aig-1') + Aig.add('Aig-2') + Big.add('Big-1') + + I = c.app.control.inspect.return_value = Mock() + I.hello.return_value = { + 'A@example.com': { + 'clock': 312, + 'revoked': Aig._data, + }, + 'B@example.com': { + 'clock': 29, + 'revoked': Big._data, + }, + 'C@example.com': { + 'error': 'unknown method', + }, + } + + mingle.start(c) + I.hello.assert_called_with(c.hostname, worker_state.revoked._data) + c.app.clock.adjust.assert_has_calls([ + call(312), call(29), + ], any_order=True) + self.assertIn('Aig-1', worker_state.revoked) + self.assertIn('Aig-2', worker_state.revoked) + self.assertIn('Big-1', worker_state.revoked) + finally: + worker_state.revoked.clear() + + +def _amqp_connection(): + connection = ContextMock() + connection.return_value = ContextMock() + connection.return_value.transport.driver_type = 'amqp' + return connection + + +class test_Gossip(AppCase): + + def test_init(self): + c = self.Consumer() + c.app.connection = _amqp_connection() + g = Gossip(c) + self.assertTrue(g.enabled) + self.assertIs(c.gossip, g) + + def test_election(self): + c = self.Consumer() + c.app.connection = _amqp_connection() + g = Gossip(c) + g.start(c) + g.election('id', 'topic', 'action') + self.assertListEqual(g.consensus_replies['id'], []) + g.dispatcher.send.assert_called_with( + 'worker-elect', id='id', topic='topic', cver=1, action='action', + ) + + def test_call_task(self): + c = self.Consumer() + c.app.connection = _amqp_connection() + g = Gossip(c) + g.start(c) + + with patch('celery.worker.consumer.signature') as signature: + sig = signature.return_value = Mock() + task = Mock() + g.call_task(task) + signature.assert_called_with(task, app=c.app) + sig.apply_async.assert_called_with() + + sig.apply_async.side_effect = MemoryError() + with patch('celery.worker.consumer.error') as error: + g.call_task(task) + self.assertTrue(error.called) + + def Event(self, id='id', clock=312, + hostname='foo@example.com', pid=4312, + topic='topic', action='action', cver=1): + return { + 'id': id, + 'clock': clock, + 'hostname': hostname, + 'pid': pid, + 'topic': topic, + 'action': action, + 'cver': cver, + } + + def test_on_elect(self): + c = self.Consumer() + c.app.connection = _amqp_connection() + g = Gossip(c) + g.start(c) + + event = self.Event('id1') + g.on_elect(event) + in_heap = g.consensus_requests['id1'] + self.assertTrue(in_heap) + g.dispatcher.send.assert_called_with('worker-elect-ack', id='id1') + + event.pop('clock') + with patch('celery.worker.consumer.error') as error: + g.on_elect(event) + self.assertTrue(error.called) + + def Consumer(self, hostname='foo@x.com', pid=4312): + c = Mock() + c.app.connection = _amqp_connection() + c.hostname = hostname + c.pid = pid + return c + + def setup_election(self, g, c): + g.start(c) + g.clock = self.app.clock + self.assertNotIn('idx', g.consensus_replies) + self.assertIsNone(g.on_elect_ack({'id': 'idx'})) + + g.state.alive_workers.return_value = [ + 'foo@x.com', 'bar@x.com', 'baz@x.com', + ] + g.consensus_replies['id1'] = [] + g.consensus_requests['id1'] = [] + e1 = self.Event('id1', 1, 'foo@x.com') + e2 = self.Event('id1', 2, 'bar@x.com') + e3 = self.Event('id1', 3, 'baz@x.com') + g.on_elect(e1) + g.on_elect(e2) + g.on_elect(e3) + self.assertEqual(len(g.consensus_requests['id1']), 3) + + with patch('celery.worker.consumer.info'): + g.on_elect_ack(e1) + self.assertEqual(len(g.consensus_replies['id1']), 1) + g.on_elect_ack(e2) + self.assertEqual(len(g.consensus_replies['id1']), 2) + g.on_elect_ack(e3) + with self.assertRaises(KeyError): + g.consensus_replies['id1'] + + def test_on_elect_ack_win(self): + c = self.Consumer(hostname='foo@x.com') # I will win + g = Gossip(c) + handler = g.election_handlers['topic'] = Mock() + self.setup_election(g, c) + handler.assert_called_with('action') + + def test_on_elect_ack_lose(self): + c = self.Consumer(hostname='bar@x.com') # I will lose + c.app.connection = _amqp_connection() + g = Gossip(c) + handler = g.election_handlers['topic'] = Mock() + self.setup_election(g, c) + self.assertFalse(handler.called) + + def test_on_elect_ack_win_but_no_action(self): + c = self.Consumer(hostname='foo@x.com') # I will win + g = Gossip(c) + g.election_handlers = {} + with patch('celery.worker.consumer.error') as error: + self.setup_election(g, c) + self.assertTrue(error.called) + + def test_on_node_join(self): + c = self.Consumer() + g = Gossip(c) + with patch('celery.worker.consumer.debug') as debug: + g.on_node_join(c) + debug.assert_called_with('%s joined the party', 'foo@x.com') + + def test_on_node_leave(self): + c = self.Consumer() + g = Gossip(c) + with patch('celery.worker.consumer.debug') as debug: + g.on_node_leave(c) + debug.assert_called_with('%s left', 'foo@x.com') + + def test_on_node_lost(self): + c = self.Consumer() + g = Gossip(c) + with patch('celery.worker.consumer.info') as info: + g.on_node_lost(c) + info.assert_called_with('missed heartbeat from %s', 'foo@x.com') + + def test_register_timer(self): + c = self.Consumer() + g = Gossip(c) + g.register_timer() + c.timer.call_repeatedly.assert_called_with(g.interval, g.periodic) + tref = g._tref + g.register_timer() + tref.cancel.assert_called_with() + + def test_periodic(self): + c = self.Consumer() + g = Gossip(c) + g.on_node_lost = Mock() + state = g.state = Mock() + worker = Mock() + state.workers = {'foo': worker} + worker.alive = True + worker.hostname = 'foo' + g.periodic() + + worker.alive = False + g.periodic() + g.on_node_lost.assert_called_with(worker) + with self.assertRaises(KeyError): + state.workers['foo'] + + def test_on_message(self): + c = self.Consumer() + g = Gossip(c) + self.assertTrue(g.enabled) + prepare = Mock() + prepare.return_value = 'worker-online', {} + c.app.events.State.assert_called_with( + on_node_join=g.on_node_join, + on_node_leave=g.on_node_leave, + max_tasks_in_memory=1, + ) + g.update_state = Mock() + worker = Mock() + g.on_node_join = Mock() + g.on_node_leave = Mock() + g.update_state.return_value = worker, 1 + message = Mock() + message.delivery_info = {'routing_key': 'worker-online'} + message.headers = {'hostname': 'other'} + + handler = g.event_handlers['worker-online'] = Mock() + g.on_message(prepare, message) + handler.assert_called_with(message.payload) + g.event_handlers = {} + + g.on_message(prepare, message) + + message.delivery_info = {'routing_key': 'worker-offline'} + prepare.return_value = 'worker-offline', {} + g.on_message(prepare, message) + + message.delivery_info = {'routing_key': 'worker-baz'} + prepare.return_value = 'worker-baz', {} + g.update_state.return_value = worker, 0 + g.on_message(prepare, message) + + message.headers = {'hostname': g.hostname} + g.on_message(prepare, message) + g.clock.forward.assert_called_with() diff --git a/celery/tests/worker/test_control.py b/celery/tests/worker/test_control.py new file mode 100644 index 0000000..f2a17df --- /dev/null +++ b/celery/tests/worker/test_control.py @@ -0,0 +1,598 @@ +from __future__ import absolute_import + +import sys +import socket + +from collections import defaultdict +from datetime import datetime, timedelta + +from kombu import pidbox + +from celery.datastructures import AttributeDict +from celery.five import Queue as FastQueue +from celery.utils import uuid +from celery.utils.timer2 import Timer +from celery.worker import WorkController as _WC +from celery.worker import consumer +from celery.worker import control +from celery.worker import state as worker_state +from celery.worker.job import Request +from celery.worker.state import revoked +from celery.worker.control import Panel +from celery.worker.pidbox import Pidbox, gPidbox + +from celery.tests.case import AppCase, Mock, call, patch + +hostname = socket.gethostname() + + +class WorkController(object): + autoscaler = None + + def stats(self): + return {'total': worker_state.total_count} + + +class Consumer(consumer.Consumer): + + def __init__(self, app): + self.app = app + self.buffer = FastQueue() + self.handle_task = self.buffer.put + self.timer = Timer() + self.event_dispatcher = Mock() + self.controller = WorkController() + self.task_consumer = Mock() + self.prefetch_multiplier = 1 + self.initial_prefetch_count = 1 + + from celery.concurrency.base import BasePool + self.pool = BasePool(10) + self.task_buckets = defaultdict(lambda: None) + + +class test_Pidbox(AppCase): + + def test_shutdown(self): + with patch('celery.worker.pidbox.ignore_errors') as eig: + parent = Mock() + pbox = Pidbox(parent) + pbox._close_channel = Mock() + self.assertIs(pbox.c, parent) + pconsumer = pbox.consumer = Mock() + cancel = pconsumer.cancel + pbox.shutdown(parent) + eig.assert_called_with(parent, cancel) + pbox._close_channel.assert_called_with(parent) + + +class test_Pidbox_green(AppCase): + + def test_stop(self): + parent = Mock() + g = gPidbox(parent) + stopped = g._node_stopped = Mock() + shutdown = g._node_shutdown = Mock() + close_chan = g._close_channel = Mock() + + g.stop(parent) + shutdown.set.assert_called_with() + stopped.wait.assert_called_with() + close_chan.assert_called_with(parent) + self.assertIsNone(g._node_stopped) + self.assertIsNone(g._node_shutdown) + + close_chan.reset() + g.stop(parent) + close_chan.assert_called_with(parent) + + def test_resets(self): + parent = Mock() + g = gPidbox(parent) + g._resets = 100 + g.reset() + self.assertEqual(g._resets, 101) + + def test_loop(self): + parent = Mock() + conn = parent.connect.return_value = self.app.connection() + drain = conn.drain_events = Mock() + g = gPidbox(parent) + parent.connection = Mock() + do_reset = g._do_reset = Mock() + + call_count = [0] + + def se(*args, **kwargs): + if call_count[0] > 2: + g._node_shutdown.set() + g.reset() + call_count[0] += 1 + drain.side_effect = se + g.loop(parent) + + self.assertEqual(do_reset.call_count, 4) + + +class test_ControlPanel(AppCase): + + def setup(self): + self.panel = self.create_panel(consumer=Consumer(self.app)) + + @self.app.task(name='c.unittest.mytask', rate_limit=200, shared=False) + def mytask(): + pass + self.mytask = mytask + + def create_state(self, **kwargs): + kwargs.setdefault('app', self.app) + kwargs.setdefault('hostname', hostname) + return AttributeDict(kwargs) + + def create_panel(self, **kwargs): + return self.app.control.mailbox.Node(hostname=hostname, + state=self.create_state(**kwargs), + handlers=Panel.data) + + def test_enable_events(self): + consumer = Consumer(self.app) + panel = self.create_panel(consumer=consumer) + evd = consumer.event_dispatcher + evd.groups = set() + panel.handle('enable_events') + self.assertIn('task', evd.groups) + evd.groups = set(['task']) + self.assertIn('already enabled', panel.handle('enable_events')['ok']) + + def test_disable_events(self): + consumer = Consumer(self.app) + panel = self.create_panel(consumer=consumer) + evd = consumer.event_dispatcher + evd.enabled = True + evd.groups = set(['task']) + panel.handle('disable_events') + self.assertNotIn('task', evd.groups) + self.assertIn('already disabled', panel.handle('disable_events')['ok']) + + def test_clock(self): + consumer = Consumer(self.app) + panel = self.create_panel(consumer=consumer) + panel.state.app.clock.value = 313 + x = panel.handle('clock') + self.assertEqual(x['clock'], 313) + + def test_hello(self): + consumer = Consumer(self.app) + panel = self.create_panel(consumer=consumer) + panel.state.app.clock.value = 313 + worker_state.revoked.add('revoked1') + try: + x = panel.handle('hello', {'from_node': 'george@vandelay.com'}) + self.assertIn('revoked1', x['revoked']) + self.assertEqual(x['clock'], 314) # incremented + finally: + worker_state.revoked.discard('revoked1') + + def test_conf(self): + return + consumer = Consumer(self.app) + panel = self.create_panel(consumer=consumer) + self.app.conf.SOME_KEY6 = 'hello world' + x = panel.handle('dump_conf') + self.assertIn('SOME_KEY6', x) + + def test_election(self): + consumer = Consumer(self.app) + panel = self.create_panel(consumer=consumer) + consumer.gossip = Mock() + panel.handle( + 'election', {'id': 'id', 'topic': 'topic', 'action': 'action'}, + ) + consumer.gossip.election.assert_called_with('id', 'topic', 'action') + + def test_heartbeat(self): + consumer = Consumer(self.app) + panel = self.create_panel(consumer=consumer) + consumer.event_dispatcher.enabled = True + panel.handle('heartbeat') + self.assertIn(('worker-heartbeat', ), + consumer.event_dispatcher.send.call_args) + + def test_time_limit(self): + panel = self.create_panel(consumer=Mock()) + r = panel.handle('time_limit', arguments=dict( + task_name=self.mytask.name, hard=30, soft=10)) + self.assertEqual( + (self.mytask.time_limit, self.mytask.soft_time_limit), + (30, 10), + ) + self.assertIn('ok', r) + r = panel.handle('time_limit', arguments=dict( + task_name=self.mytask.name, hard=None, soft=None)) + self.assertEqual( + (self.mytask.time_limit, self.mytask.soft_time_limit), + (None, None), + ) + self.assertIn('ok', r) + + r = panel.handle('time_limit', arguments=dict( + task_name='248e8afya9s8dh921eh928', hard=30)) + self.assertIn('error', r) + + def test_active_queues(self): + import kombu + + x = kombu.Consumer(self.app.connection(), + [kombu.Queue('foo', kombu.Exchange('foo'), 'foo'), + kombu.Queue('bar', kombu.Exchange('bar'), 'bar')], + auto_declare=False) + consumer = Mock() + consumer.task_consumer = x + panel = self.create_panel(consumer=consumer) + r = panel.handle('active_queues') + self.assertListEqual(list(sorted(q['name'] for q in r)), + ['bar', 'foo']) + + def test_dump_tasks(self): + info = '\n'.join(self.panel.handle('dump_tasks')) + self.assertIn('mytask', info) + self.assertIn('rate_limit=200', info) + + def test_stats(self): + prev_count, worker_state.total_count = worker_state.total_count, 100 + try: + self.assertDictContainsSubset({'total': 100}, + self.panel.handle('stats')) + finally: + worker_state.total_count = prev_count + + def test_report(self): + self.panel.handle('report') + + def test_active(self): + r = Request({ + 'task': self.mytask.name, + 'id': 'do re mi', + 'args': (), + 'kwargs': {}, + }, app=self.app) + worker_state.active_requests.add(r) + try: + self.assertTrue(self.panel.handle('dump_active')) + finally: + worker_state.active_requests.discard(r) + + def test_pool_grow(self): + + class MockPool(object): + + def __init__(self, size=1): + self.size = size + + def grow(self, n=1): + self.size += n + + def shrink(self, n=1): + self.size -= n + + @property + def num_processes(self): + return self.size + + consumer = Consumer(self.app) + consumer.prefetch_multiplier = 8 + consumer.qos = Mock(name='qos') + consumer.pool = MockPool(1) + panel = self.create_panel(consumer=consumer) + + panel.handle('pool_grow') + self.assertEqual(consumer.pool.size, 2) + consumer.qos.increment_eventually.assert_called_with(8) + self.assertEqual(consumer.initial_prefetch_count, 16) + panel.handle('pool_shrink') + self.assertEqual(consumer.pool.size, 1) + consumer.qos.decrement_eventually.assert_called_with(8) + self.assertEqual(consumer.initial_prefetch_count, 8) + + panel.state.consumer = Mock() + panel.state.consumer.controller = Mock() + sc = panel.state.consumer.controller.autoscaler = Mock() + panel.handle('pool_grow') + self.assertTrue(sc.force_scale_up.called) + panel.handle('pool_shrink') + self.assertTrue(sc.force_scale_down.called) + + def test_add__cancel_consumer(self): + + class MockConsumer(object): + queues = [] + cancelled = [] + consuming = False + + def add_queue(self, queue): + self.queues.append(queue.name) + + def consume(self): + self.consuming = True + + def cancel_by_queue(self, queue): + self.cancelled.append(queue) + + def consuming_from(self, queue): + return queue in self.queues + + consumer = Consumer(self.app) + consumer.task_consumer = MockConsumer() + panel = self.create_panel(consumer=consumer) + + panel.handle('add_consumer', {'queue': 'MyQueue'}) + self.assertIn('MyQueue', consumer.task_consumer.queues) + self.assertTrue(consumer.task_consumer.consuming) + panel.handle('add_consumer', {'queue': 'MyQueue'}) + panel.handle('cancel_consumer', {'queue': 'MyQueue'}) + self.assertIn('MyQueue', consumer.task_consumer.cancelled) + + def test_revoked(self): + worker_state.revoked.clear() + worker_state.revoked.add('a1') + worker_state.revoked.add('a2') + + try: + self.assertEqual(sorted(self.panel.handle('dump_revoked')), + ['a1', 'a2']) + finally: + worker_state.revoked.clear() + + def test_dump_schedule(self): + consumer = Consumer(self.app) + panel = self.create_panel(consumer=consumer) + self.assertFalse(panel.handle('dump_schedule')) + r = Request({ + 'task': self.mytask.name, + 'id': 'CAFEBABE', + 'args': (), + 'kwargs': {}, + }, app=self.app) + consumer.timer.schedule.enter_at( + consumer.timer.Entry(lambda x: x, (r, )), + datetime.now() + timedelta(seconds=10)) + consumer.timer.schedule.enter_at( + consumer.timer.Entry(lambda x: x, (object(), )), + datetime.now() + timedelta(seconds=10)) + self.assertTrue(panel.handle('dump_schedule')) + + def test_dump_reserved(self): + consumer = Consumer(self.app) + worker_state.reserved_requests.add(Request({ + 'task': self.mytask.name, + 'id': uuid(), + 'args': (2, 2), + 'kwargs': {}, + }, app=self.app)) + try: + panel = self.create_panel(consumer=consumer) + response = panel.handle('dump_reserved', {'safe': True}) + self.assertDictContainsSubset( + {'name': self.mytask.name, + 'args': (2, 2), + 'kwargs': {}, + 'hostname': socket.gethostname()}, + response[0], + ) + worker_state.reserved_requests.clear() + self.assertFalse(panel.handle('dump_reserved')) + finally: + worker_state.reserved_requests.clear() + + def test_rate_limit_invalid_rate_limit_string(self): + e = self.panel.handle('rate_limit', arguments=dict( + task_name='tasks.add', rate_limit='x1240301#%!')) + self.assertIn('Invalid rate limit string', e.get('error')) + + def test_rate_limit(self): + + class xConsumer(object): + reset = False + + def reset_rate_limits(self): + self.reset = True + + consumer = xConsumer() + panel = self.create_panel(app=self.app, consumer=consumer) + + task = self.app.tasks[self.mytask.name] + panel.handle('rate_limit', arguments=dict(task_name=task.name, + rate_limit='100/m')) + self.assertEqual(task.rate_limit, '100/m') + self.assertTrue(consumer.reset) + consumer.reset = False + panel.handle('rate_limit', arguments=dict(task_name=task.name, + rate_limit=0)) + self.assertEqual(task.rate_limit, 0) + self.assertTrue(consumer.reset) + + def test_rate_limit_nonexistant_task(self): + self.panel.handle('rate_limit', arguments={ + 'task_name': 'xxxx.does.not.exist', + 'rate_limit': '1000/s'}) + + def test_unexposed_command(self): + with self.assertRaises(KeyError): + self.panel.handle('foo', arguments={}) + + def test_revoke_with_name(self): + tid = uuid() + m = {'method': 'revoke', + 'destination': hostname, + 'arguments': {'task_id': tid, + 'task_name': self.mytask.name}} + self.panel.handle_message(m, None) + self.assertIn(tid, revoked) + + def test_revoke_with_name_not_in_registry(self): + tid = uuid() + m = {'method': 'revoke', + 'destination': hostname, + 'arguments': {'task_id': tid, + 'task_name': 'xxxxxxxxx33333333388888'}} + self.panel.handle_message(m, None) + self.assertIn(tid, revoked) + + def test_revoke(self): + tid = uuid() + m = {'method': 'revoke', + 'destination': hostname, + 'arguments': {'task_id': tid}} + self.panel.handle_message(m, None) + self.assertIn(tid, revoked) + + m = {'method': 'revoke', + 'destination': 'does.not.exist', + 'arguments': {'task_id': tid + 'xxx'}} + self.panel.handle_message(m, None) + self.assertNotIn(tid + 'xxx', revoked) + + def test_revoke_terminate(self): + request = Mock() + request.id = tid = uuid() + worker_state.reserved_requests.add(request) + try: + r = control.revoke(Mock(), tid, terminate=True) + self.assertIn(tid, revoked) + self.assertTrue(request.terminate.call_count) + self.assertIn('terminate:', r['ok']) + # unknown task id only revokes + r = control.revoke(Mock(), uuid(), terminate=True) + self.assertIn('tasks unknown', r['ok']) + finally: + worker_state.reserved_requests.discard(request) + + def test_autoscale(self): + self.panel.state.consumer = Mock() + self.panel.state.consumer.controller = Mock() + sc = self.panel.state.consumer.controller.autoscaler = Mock() + sc.update.return_value = 10, 2 + m = {'method': 'autoscale', + 'destination': hostname, + 'arguments': {'max': '10', 'min': '2'}} + r = self.panel.handle_message(m, None) + self.assertIn('ok', r) + + self.panel.state.consumer.controller.autoscaler = None + r = self.panel.handle_message(m, None) + self.assertIn('error', r) + + def test_ping(self): + m = {'method': 'ping', + 'destination': hostname} + r = self.panel.handle_message(m, None) + self.assertEqual(r, {'ok': 'pong'}) + + def test_shutdown(self): + m = {'method': 'shutdown', + 'destination': hostname} + with self.assertRaises(SystemExit): + self.panel.handle_message(m, None) + + def test_panel_reply(self): + + replies = [] + + class _Node(pidbox.Node): + + def reply(self, data, exchange, routing_key, **kwargs): + replies.append(data) + + panel = _Node(hostname=hostname, + state=self.create_state(consumer=Consumer(self.app)), + handlers=Panel.data, + mailbox=self.app.control.mailbox) + r = panel.dispatch('ping', reply_to={'exchange': 'x', + 'routing_key': 'x'}) + self.assertEqual(r, {'ok': 'pong'}) + self.assertDictEqual(replies[0], {panel.hostname: {'ok': 'pong'}}) + + def test_pool_restart(self): + consumer = Consumer(self.app) + consumer.controller = _WC(app=self.app) + consumer.controller.consumer = consumer + consumer.controller.pool.restart = Mock() + consumer.reset_rate_limits = Mock(name='reset_rate_limits()') + consumer.update_strategies = Mock(name='update_strategies()') + consumer.event_dispatcher = Mock(name='evd') + panel = self.create_panel(consumer=consumer) + assert panel.state.consumer.controller.consumer is consumer + panel.app = self.app + _import = panel.app.loader.import_from_cwd = Mock() + _reload = Mock() + + with self.assertRaises(ValueError): + panel.handle('pool_restart', {'reloader': _reload}) + + self.app.conf.CELERYD_POOL_RESTARTS = True + panel.handle('pool_restart', {'reloader': _reload}) + self.assertTrue(consumer.controller.pool.restart.called) + consumer.reset_rate_limits.assert_called_with() + consumer.update_strategies.assert_called_with() + self.assertFalse(_reload.called) + self.assertFalse(_import.called) + + def test_pool_restart_import_modules(self): + consumer = Consumer(self.app) + consumer.controller = _WC(app=self.app) + consumer.controller.consumer = consumer + consumer.controller.pool.restart = Mock() + consumer.reset_rate_limits = Mock(name='reset_rate_limits()') + consumer.update_strategies = Mock(name='update_strategies()') + panel = self.create_panel(consumer=consumer) + panel.app = self.app + assert panel.state.consumer.controller.consumer is consumer + _import = consumer.controller.app.loader.import_from_cwd = Mock() + _reload = Mock() + + self.app.conf.CELERYD_POOL_RESTARTS = True + panel.handle('pool_restart', {'modules': ['foo', 'bar'], + 'reloader': _reload}) + + self.assertTrue(consumer.controller.pool.restart.called) + consumer.reset_rate_limits.assert_called_with() + consumer.update_strategies.assert_called_with() + self.assertFalse(_reload.called) + self.assertItemsEqual( + [call('bar'), call('foo')], + _import.call_args_list, + ) + + def test_pool_restart_reload_modules(self): + consumer = Consumer(self.app) + consumer.controller = _WC(app=self.app) + consumer.controller.consumer = consumer + consumer.controller.pool.restart = Mock() + consumer.reset_rate_limits = Mock(name='reset_rate_limits()') + consumer.update_strategies = Mock(name='update_strategies()') + panel = self.create_panel(consumer=consumer) + panel.app = self.app + _import = panel.app.loader.import_from_cwd = Mock() + _reload = Mock() + + self.app.conf.CELERYD_POOL_RESTARTS = True + with patch.dict(sys.modules, {'foo': None}): + panel.handle('pool_restart', {'modules': ['foo'], + 'reload': False, + 'reloader': _reload}) + + self.assertTrue(consumer.controller.pool.restart.called) + self.assertFalse(_reload.called) + self.assertFalse(_import.called) + + _import.reset_mock() + _reload.reset_mock() + consumer.controller.pool.restart.reset_mock() + + panel.handle('pool_restart', {'modules': ['foo'], + 'reload': True, + 'reloader': _reload}) + + self.assertTrue(consumer.controller.pool.restart.called) + self.assertTrue(_reload.called) + self.assertFalse(_import.called) diff --git a/celery/tests/worker/test_heartbeat.py b/celery/tests/worker/test_heartbeat.py new file mode 100644 index 0000000..5568e4e --- /dev/null +++ b/celery/tests/worker/test_heartbeat.py @@ -0,0 +1,73 @@ +from __future__ import absolute_import + +from celery.worker.heartbeat import Heart +from celery.tests.case import AppCase + + +class MockDispatcher(object): + heart = None + next_iter = 0 + + def __init__(self): + self.sent = [] + self.on_enabled = set() + self.on_disabled = set() + self.enabled = True + + def send(self, msg, **_fields): + self.sent.append(msg) + if self.heart: + if self.next_iter > 10: + self.heart._shutdown.set() + self.next_iter += 1 + + +class MockDispatcherRaising(object): + + def send(self, msg): + if msg == 'worker-offline': + raise Exception('foo') + + +class MockTimer(object): + + def call_repeatedly(self, secs, fun, args=(), kwargs={}): + + class entry(tuple): + cancelled = False + + def cancel(self): + self.cancelled = True + + return entry((secs, fun, args, kwargs)) + + def cancel(self, entry): + entry.cancel() + + +class test_Heart(AppCase): + + def test_start_stop(self): + timer = MockTimer() + eventer = MockDispatcher() + h = Heart(timer, eventer, interval=1) + h.start() + self.assertTrue(h.tref) + h.stop() + self.assertIsNone(h.tref) + h.stop() + + def test_start_when_disabled(self): + timer = MockTimer() + eventer = MockDispatcher() + eventer.enabled = False + h = Heart(timer, eventer) + h.start() + self.assertFalse(h.tref) + + def test_stop_when_disabled(self): + timer = MockTimer() + eventer = MockDispatcher() + eventer.enabled = False + h = Heart(timer, eventer) + h.stop() diff --git a/celery/tests/worker/test_hub.py b/celery/tests/worker/test_hub.py new file mode 100644 index 0000000..4e9e490 --- /dev/null +++ b/celery/tests/worker/test_hub.py @@ -0,0 +1,341 @@ +from __future__ import absolute_import + +from kombu.async import Hub, READ, WRITE, ERR +from kombu.async.debug import callback_for, repr_flag, _rcb +from kombu.async.semaphore import DummyLock, LaxBoundedSemaphore + +from celery.five import range +from celery.tests.case import Case, Mock, call, patch + + +class File(object): + + def __init__(self, fd): + self.fd = fd + + def fileno(self): + return self.fd + + def __eq__(self, other): + if isinstance(other, File): + return self.fd == other.fd + return NotImplemented + + def __hash__(self): + return hash(self.fd) + + +class test_DummyLock(Case): + + def test_context(self): + mutex = DummyLock() + with mutex: + pass + + +class test_LaxBoundedSemaphore(Case): + + def test_acquire_release(self): + x = LaxBoundedSemaphore(2) + + c1 = Mock() + x.acquire(c1, 1) + self.assertEqual(x.value, 1) + c1.assert_called_with(1) + + c2 = Mock() + x.acquire(c2, 2) + self.assertEqual(x.value, 0) + c2.assert_called_with(2) + + c3 = Mock() + x.acquire(c3, 3) + self.assertEqual(x.value, 0) + self.assertFalse(c3.called) + + x.release() + self.assertEqual(x.value, 0) + x.release() + self.assertEqual(x.value, 1) + x.release() + self.assertEqual(x.value, 2) + c3.assert_called_with(3) + + def test_bounded(self): + x = LaxBoundedSemaphore(2) + for i in range(100): + x.release() + self.assertEqual(x.value, 2) + + def test_grow_shrink(self): + x = LaxBoundedSemaphore(1) + self.assertEqual(x.initial_value, 1) + cb1 = Mock() + x.acquire(cb1, 1) + cb1.assert_called_with(1) + self.assertEqual(x.value, 0) + + cb2 = Mock() + x.acquire(cb2, 2) + self.assertFalse(cb2.called) + self.assertEqual(x.value, 0) + + cb3 = Mock() + x.acquire(cb3, 3) + self.assertFalse(cb3.called) + + x.grow(2) + cb2.assert_called_with(2) + cb3.assert_called_with(3) + self.assertEqual(x.value, 2) + self.assertEqual(x.initial_value, 3) + + self.assertFalse(x._waiting) + x.grow(3) + for i in range(x.initial_value): + self.assertTrue(x.acquire(Mock())) + self.assertFalse(x.acquire(Mock())) + x.clear() + + x.shrink(3) + for i in range(x.initial_value): + self.assertTrue(x.acquire(Mock())) + self.assertFalse(x.acquire(Mock())) + self.assertEqual(x.value, 0) + + for i in range(100): + x.release() + self.assertEqual(x.value, x.initial_value) + + def test_clear(self): + x = LaxBoundedSemaphore(10) + for i in range(11): + x.acquire(Mock()) + self.assertTrue(x._waiting) + self.assertEqual(x.value, 0) + + x.clear() + self.assertFalse(x._waiting) + self.assertEqual(x.value, x.initial_value) + + +class test_Hub(Case): + + def test_repr_flag(self): + self.assertEqual(repr_flag(READ), 'R') + self.assertEqual(repr_flag(WRITE), 'W') + self.assertEqual(repr_flag(ERR), '!') + self.assertEqual(repr_flag(READ | WRITE), 'RW') + self.assertEqual(repr_flag(READ | ERR), 'R!') + self.assertEqual(repr_flag(WRITE | ERR), 'W!') + self.assertEqual(repr_flag(READ | WRITE | ERR), 'RW!') + + def test_repr_callback_rcb(self): + + def f(): + pass + + self.assertEqual(_rcb(f), f.__name__) + self.assertEqual(_rcb('foo'), 'foo') + + @patch('kombu.async.hub.poll') + def test_start_stop(self, poll): + hub = Hub() + poll.assert_called_with() + + poller = hub.poller + hub.stop() + hub.close() + poller.close.assert_called_with() + + def test_fire_timers(self): + hub = Hub() + hub.timer = Mock() + hub.timer._queue = [] + self.assertEqual(hub.fire_timers(min_delay=42.324, + max_delay=32.321), 32.321) + + hub.timer._queue = [1] + hub.scheduler = iter([(3.743, None)]) + self.assertEqual(hub.fire_timers(), 3.743) + + e1, e2, e3 = Mock(), Mock(), Mock() + entries = [e1, e2, e3] + + reset = lambda: [m.reset() for m in [e1, e2, e3]] + + def se(): + while 1: + while entries: + yield None, entries.pop() + yield 3.982, None + hub.scheduler = se() + + self.assertEqual(hub.fire_timers(max_timers=10), 3.982) + for E in [e3, e2, e1]: + E.assert_called_with() + reset() + + entries[:] = [Mock() for _ in range(11)] + keep = list(entries) + self.assertEqual(hub.fire_timers(max_timers=10, min_delay=1.13), 1.13) + for E in reversed(keep[1:]): + E.assert_called_with() + reset() + self.assertEqual(hub.fire_timers(max_timers=10), 3.982) + keep[0].assert_called_with() + + def test_fire_timers_raises(self): + hub = Hub() + eback = Mock() + eback.side_effect = KeyError('foo') + hub.timer = Mock() + hub.scheduler = iter([(0, eback)]) + with self.assertRaises(KeyError): + hub.fire_timers(propagate=(KeyError, )) + + eback.side_effect = ValueError('foo') + hub.scheduler = iter([(0, eback)]) + with patch('kombu.async.hub.logger') as logger: + with self.assertRaises(StopIteration): + hub.fire_timers() + self.assertTrue(logger.error.called) + + def test_add_raises_ValueError(self): + hub = Hub() + hub.poller = Mock(name='hub.poller') + hub.poller.register.side_effect = ValueError() + hub._discard = Mock(name='hub.discard') + with self.assertRaises(ValueError): + hub.add(2, Mock(), READ) + hub._discard.assert_called_with(2) + + def test_repr_active(self): + hub = Hub() + hub.readers = {1: Mock(), 2: Mock()} + hub.writers = {3: Mock(), 4: Mock()} + for value in list(hub.readers.values()) + list(hub.writers.values()): + value.__name__ = 'mock' + self.assertTrue(hub.repr_active()) + + def test_repr_events(self): + hub = Hub() + hub.readers = {6: Mock(), 7: Mock(), 8: Mock()} + hub.writers = {9: Mock()} + for value in list(hub.readers.values()) + list(hub.writers.values()): + value.__name__ = 'mock' + self.assertTrue(hub.repr_events([ + (6, READ), + (7, ERR), + (8, READ | ERR), + (9, WRITE), + (10, 13213), + ])) + + def test_callback_for(self): + hub = Hub() + reader, writer = Mock(), Mock() + hub.readers = {6: reader} + hub.writers = {7: writer} + + self.assertEqual(callback_for(hub, 6, READ), reader) + self.assertEqual(callback_for(hub, 7, WRITE), writer) + with self.assertRaises(KeyError): + callback_for(hub, 6, WRITE) + self.assertEqual(callback_for(hub, 6, WRITE, 'foo'), 'foo') + + def test_add_remove_readers(self): + hub = Hub() + P = hub.poller = Mock() + + read_A = Mock() + read_B = Mock() + hub.add_reader(10, read_A, 10) + hub.add_reader(File(11), read_B, 11) + + P.register.assert_has_calls([ + call(10, hub.READ | hub.ERR), + call(11, hub.READ | hub.ERR), + ], any_order=True) + + self.assertEqual(hub.readers[10], (read_A, (10, ))) + self.assertEqual(hub.readers[11], (read_B, (11, ))) + + hub.remove(10) + self.assertNotIn(10, hub.readers) + hub.remove(File(11)) + self.assertNotIn(11, hub.readers) + P.unregister.assert_has_calls([ + call(10), call(11), + ]) + + def test_can_remove_unknown_fds(self): + hub = Hub() + hub.poller = Mock() + hub.remove(30) + hub.remove(File(301)) + + def test_remove__unregister_raises(self): + hub = Hub() + hub.poller = Mock() + hub.poller.unregister.side_effect = OSError() + + hub.remove(313) + + def test_add_writers(self): + hub = Hub() + P = hub.poller = Mock() + + write_A = Mock() + write_B = Mock() + hub.add_writer(20, write_A) + hub.add_writer(File(21), write_B) + + P.register.assert_has_calls([ + call(20, hub.WRITE), + call(21, hub.WRITE), + ], any_order=True) + + self.assertEqual(hub.writers[20], (write_A, ())) + self.assertEqual(hub.writers[21], (write_B, ())) + + hub.remove(20) + self.assertNotIn(20, hub.writers) + hub.remove(File(21)) + self.assertNotIn(21, hub.writers) + P.unregister.assert_has_calls([ + call(20), call(21), + ]) + + def test_enter__exit(self): + hub = Hub() + P = hub.poller = Mock() + on_close = Mock() + hub.on_close.add(on_close) + + try: + read_A = Mock() + read_B = Mock() + hub.add_reader(10, read_A) + hub.add_reader(File(11), read_B) + write_A = Mock() + write_B = Mock() + hub.add_writer(20, write_A) + hub.add_writer(File(21), write_B) + self.assertTrue(hub.readers) + self.assertTrue(hub.writers) + finally: + assert hub.poller + hub.close() + self.assertFalse(hub.readers) + self.assertFalse(hub.writers) + + P.unregister.assert_has_calls([ + call(10), call(11), call(20), call(21), + ], any_order=True) + + on_close.assert_called_with(hub) + + def test_scheduler_property(self): + hub = Hub(timer=[1, 2, 3]) + self.assertEqual(list(hub.scheduler), [1, 2, 3]) diff --git a/celery/tests/worker/test_loops.py b/celery/tests/worker/test_loops.py new file mode 100644 index 0000000..00c5d96 --- /dev/null +++ b/celery/tests/worker/test_loops.py @@ -0,0 +1,416 @@ +from __future__ import absolute_import + +import socket + +from kombu.async import Hub, READ, WRITE, ERR + +from celery.bootsteps import CLOSE, RUN +from celery.exceptions import InvalidTaskError, WorkerShutdown, WorkerTerminate +from celery.five import Empty +from celery.worker import state +from celery.worker.consumer import Consumer +from celery.worker.loops import asynloop, synloop + +from celery.tests.case import AppCase, Mock, body_from_sig + + +class X(object): + + def __init__(self, app, heartbeat=None, on_task_message=None): + hub = Hub() + ( + self.obj, + self.connection, + self.consumer, + self.blueprint, + self.hub, + self.qos, + self.heartbeat, + self.clock, + ) = self.args = [Mock(name='obj'), + Mock(name='connection'), + Mock(name='consumer'), + Mock(name='blueprint'), + hub, + Mock(name='qos'), + heartbeat, + Mock(name='clock')] + self.connection.supports_heartbeats = True + self.connection.get_heartbeat_interval.side_effect = ( + lambda: self.heartbeat + ) + self.consumer.callbacks = [] + self.obj.strategies = {} + self.connection.connection_errors = (socket.error, ) + self.hub.readers = {} + self.hub.writers = {} + self.hub.consolidate = set() + self.hub.timer = Mock(name='hub.timer') + self.hub.timer._queue = [Mock()] + self.hub.fire_timers = Mock(name='hub.fire_timers') + self.hub.fire_timers.return_value = 1.7 + self.hub.poller = Mock(name='hub.poller') + self.hub.close = Mock(name='hub.close()') # asynloop calls hub.close + self.Hub = self.hub + self.blueprint.state = RUN + # need this for create_task_handler + _consumer = Consumer(Mock(), timer=Mock(), app=app) + _consumer.on_task_message = on_task_message or [] + self.obj.create_task_handler = _consumer.create_task_handler + self.on_unknown_message = self.obj.on_unknown_message = Mock( + name='on_unknown_message', + ) + _consumer.on_unknown_message = self.on_unknown_message + self.on_unknown_task = self.obj.on_unknown_task = Mock( + name='on_unknown_task', + ) + _consumer.on_unknown_task = self.on_unknown_task + self.on_invalid_task = self.obj.on_invalid_task = Mock( + name='on_invalid_task', + ) + _consumer.on_invalid_task = self.on_invalid_task + _consumer.strategies = self.obj.strategies + + def timeout_then_error(self, mock): + + def first(*args, **kwargs): + mock.side_effect = socket.error() + self.connection.more_to_read = False + raise socket.timeout() + mock.side_effect = first + + def close_then_error(self, mock=None, mod=0, exc=None): + mock = Mock() if mock is None else mock + + def first(*args, **kwargs): + if not mod or mock.call_count > mod: + self.close() + self.connection.more_to_read = False + raise (socket.error() if exc is None else exc) + mock.side_effect = first + return mock + + def close(self, *args, **kwargs): + self.blueprint.state = CLOSE + + def closer(self, mock=None, mod=0): + mock = Mock() if mock is None else mock + + def closing(*args, **kwargs): + if not mod or mock.call_count >= mod: + self.close() + mock.side_effect = closing + return mock + + +def get_task_callback(*args, **kwargs): + x = X(*args, **kwargs) + x.blueprint.state = CLOSE + asynloop(*x.args) + return x, x.consumer.callbacks[0] + + +class test_asynloop(AppCase): + + def setup(self): + + @self.app.task(shared=False) + def add(x, y): + return x + y + self.add = add + + def test_setup_heartbeat(self): + x = X(self.app, heartbeat=10) + x.hub.call_repeatedly = Mock(name='x.hub.call_repeatedly()') + x.blueprint.state = CLOSE + asynloop(*x.args) + x.consumer.consume.assert_called_with() + x.obj.on_ready.assert_called_with() + x.hub.call_repeatedly.assert_called_with( + 10 / 2.0, x.connection.heartbeat_check, 2.0, + ) + + def task_context(self, sig, **kwargs): + x, on_task = get_task_callback(self.app, **kwargs) + body = body_from_sig(self.app, sig) + message = Mock() + strategy = x.obj.strategies[sig.task] = Mock() + return x, on_task, body, message, strategy + + def test_on_task_received(self): + _, on_task, body, msg, strategy = self.task_context(self.add.s(2, 2)) + on_task(body, msg) + strategy.assert_called_with( + msg, body, msg.ack_log_error, msg.reject_log_error, [], + ) + + def test_on_task_received_executes_on_task_message(self): + cbs = [Mock(), Mock(), Mock()] + _, on_task, body, msg, strategy = self.task_context( + self.add.s(2, 2), on_task_message=cbs, + ) + on_task(body, msg) + strategy.assert_called_with( + msg, body, msg.ack_log_error, msg.reject_log_error, cbs, + ) + + def test_on_task_message_missing_name(self): + x, on_task, body, msg, strategy = self.task_context(self.add.s(2, 2)) + body.pop('task') + on_task(body, msg) + x.on_unknown_message.assert_called_with(body, msg) + + def test_on_task_not_registered(self): + x, on_task, body, msg, strategy = self.task_context(self.add.s(2, 2)) + exc = strategy.side_effect = KeyError(self.add.name) + on_task(body, msg) + x.on_unknown_task.assert_called_with(body, msg, exc) + + def test_on_task_InvalidTaskError(self): + x, on_task, body, msg, strategy = self.task_context(self.add.s(2, 2)) + exc = strategy.side_effect = InvalidTaskError() + on_task(body, msg) + x.on_invalid_task.assert_called_with(body, msg, exc) + + def test_should_terminate(self): + x = X(self.app) + # XXX why aren't the errors propagated?!? + state.should_terminate = True + try: + with self.assertRaises(WorkerTerminate): + asynloop(*x.args) + finally: + state.should_terminate = False + + def test_should_terminate_hub_close_raises(self): + x = X(self.app) + # XXX why aren't the errors propagated?!? + state.should_terminate = True + x.hub.close.side_effect = MemoryError() + try: + with self.assertRaises(WorkerTerminate): + asynloop(*x.args) + finally: + state.should_terminate = False + + def test_should_stop(self): + x = X(self.app) + state.should_stop = True + try: + with self.assertRaises(WorkerShutdown): + asynloop(*x.args) + finally: + state.should_stop = False + + def test_updates_qos(self): + x = X(self.app) + x.qos.prev = 3 + x.qos.value = 3 + x.hub.on_tick.add(x.closer(mod=2)) + x.hub.timer._queue = [1] + asynloop(*x.args) + self.assertFalse(x.qos.update.called) + + x = X(self.app) + x.qos.prev = 1 + x.qos.value = 6 + x.hub.on_tick.add(x.closer(mod=2)) + asynloop(*x.args) + x.qos.update.assert_called_with() + x.hub.fire_timers.assert_called_with(propagate=(socket.error, )) + + def test_poll_empty(self): + x = X(self.app) + x.hub.readers = {6: Mock()} + x.hub.timer._queue = [1] + x.close_then_error(x.hub.poller.poll) + x.hub.fire_timers.return_value = 33.37 + poller = x.hub.poller + poller.poll.return_value = [] + with self.assertRaises(socket.error): + asynloop(*x.args) + poller.poll.assert_called_with(33.37) + + def test_poll_readable(self): + x = X(self.app) + reader = Mock(name='reader') + x.hub.add_reader(6, reader, 6) + x.hub.on_tick.add(x.close_then_error(Mock(name='tick'), mod=4)) + poller = x.hub.poller + poller.poll.return_value = [(6, READ)] + with self.assertRaises(socket.error): + asynloop(*x.args) + reader.assert_called_with(6) + self.assertTrue(poller.poll.called) + + def test_poll_readable_raises_Empty(self): + x = X(self.app) + reader = Mock(name='reader') + x.hub.add_reader(6, reader, 6) + x.hub.on_tick.add(x.close_then_error(Mock(name='tick'), 2)) + poller = x.hub.poller + poller.poll.return_value = [(6, READ)] + reader.side_effect = Empty() + with self.assertRaises(socket.error): + asynloop(*x.args) + reader.assert_called_with(6) + self.assertTrue(poller.poll.called) + + def test_poll_writable(self): + x = X(self.app) + writer = Mock(name='writer') + x.hub.add_writer(6, writer, 6) + x.hub.on_tick.add(x.close_then_error(Mock(name='tick'), 2)) + poller = x.hub.poller + poller.poll.return_value = [(6, WRITE)] + with self.assertRaises(socket.error): + asynloop(*x.args) + writer.assert_called_with(6) + self.assertTrue(poller.poll.called) + + def test_poll_writable_none_registered(self): + x = X(self.app) + writer = Mock(name='writer') + x.hub.add_writer(6, writer, 6) + x.hub.on_tick.add(x.close_then_error(Mock(name='tick'), 2)) + poller = x.hub.poller + poller.poll.return_value = [(7, WRITE)] + with self.assertRaises(socket.error): + asynloop(*x.args) + self.assertTrue(poller.poll.called) + + def test_poll_unknown_event(self): + x = X(self.app) + writer = Mock(name='reader') + x.hub.add_writer(6, writer, 6) + x.hub.on_tick.add(x.close_then_error(Mock(name='tick'), 2)) + poller = x.hub.poller + poller.poll.return_value = [(6, 0)] + with self.assertRaises(socket.error): + asynloop(*x.args) + self.assertTrue(poller.poll.called) + + def test_poll_keep_draining_disabled(self): + x = X(self.app) + x.hub.writers = {6: Mock()} + poll = x.hub.poller.poll + + def se(*args, **kwargs): + poll.side_effect = socket.error() + poll.side_effect = se + + poller = x.hub.poller + poll.return_value = [(6, 0)] + with self.assertRaises(socket.error): + asynloop(*x.args) + self.assertTrue(poller.poll.called) + + def test_poll_err_writable(self): + x = X(self.app) + writer = Mock(name='writer') + x.hub.add_writer(6, writer, 6, 48) + x.hub.on_tick.add(x.close_then_error(Mock(), 2)) + poller = x.hub.poller + poller.poll.return_value = [(6, ERR)] + with self.assertRaises(socket.error): + asynloop(*x.args) + writer.assert_called_with(6, 48) + self.assertTrue(poller.poll.called) + + def test_poll_write_generator(self): + x = X(self.app) + x.hub.remove = Mock(name='hub.remove()') + + def Gen(): + yield 1 + yield 2 + gen = Gen() + + x.hub.add_writer(6, gen) + x.hub.on_tick.add(x.close_then_error(Mock(name='tick'), 2)) + x.hub.poller.poll.return_value = [(6, WRITE)] + with self.assertRaises(socket.error): + asynloop(*x.args) + self.assertTrue(gen.gi_frame.f_lasti != -1) + self.assertFalse(x.hub.remove.called) + + def test_poll_write_generator_stopped(self): + x = X(self.app) + + def Gen(): + raise StopIteration() + yield + gen = Gen() + x.hub.add_writer(6, gen) + x.hub.on_tick.add(x.close_then_error(Mock(name='tick'), 2)) + x.hub.poller.poll.return_value = [(6, WRITE)] + x.hub.remove = Mock(name='hub.remove()') + with self.assertRaises(socket.error): + asynloop(*x.args) + self.assertIsNone(gen.gi_frame) + + def test_poll_write_generator_raises(self): + x = X(self.app) + + def Gen(): + raise ValueError('foo') + yield + gen = Gen() + x.hub.add_writer(6, gen) + x.hub.remove = Mock(name='hub.remove()') + x.hub.on_tick.add(x.close_then_error(Mock(name='tick'), 2)) + x.hub.poller.poll.return_value = [(6, WRITE)] + with self.assertRaises(ValueError): + asynloop(*x.args) + self.assertIsNone(gen.gi_frame) + x.hub.remove.assert_called_with(6) + + def test_poll_err_readable(self): + x = X(self.app) + reader = Mock(name='reader') + x.hub.add_reader(6, reader, 6, 24) + x.hub.on_tick.add(x.close_then_error(Mock(), 2)) + poller = x.hub.poller + poller.poll.return_value = [(6, ERR)] + with self.assertRaises(socket.error): + asynloop(*x.args) + reader.assert_called_with(6, 24) + self.assertTrue(poller.poll.called) + + def test_poll_raises_ValueError(self): + x = X(self.app) + x.hub.readers = {6: Mock()} + poller = x.hub.poller + x.close_then_error(poller.poll, exc=ValueError) + asynloop(*x.args) + self.assertTrue(poller.poll.called) + + +class test_synloop(AppCase): + + def test_timeout_ignored(self): + x = X(self.app) + x.timeout_then_error(x.connection.drain_events) + with self.assertRaises(socket.error): + synloop(*x.args) + self.assertEqual(x.connection.drain_events.call_count, 2) + + def test_updates_qos_when_changed(self): + x = X(self.app) + x.qos.prev = 2 + x.qos.value = 2 + x.timeout_then_error(x.connection.drain_events) + with self.assertRaises(socket.error): + synloop(*x.args) + self.assertFalse(x.qos.update.called) + + x.qos.value = 4 + x.timeout_then_error(x.connection.drain_events) + with self.assertRaises(socket.error): + synloop(*x.args) + x.qos.update.assert_called_with() + + def test_ignores_socket_errors_when_closed(self): + x = X(self.app) + x.close_then_error(x.connection.drain_events) + self.assertIsNone(synloop(*x.args)) diff --git a/celery/tests/worker/test_request.py b/celery/tests/worker/test_request.py new file mode 100644 index 0000000..488ea72 --- /dev/null +++ b/celery/tests/worker/test_request.py @@ -0,0 +1,962 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import, unicode_literals + +import anyjson +import os +import signal +import socket +import sys + +from datetime import datetime, timedelta + +from billiard.einfo import ExceptionInfo +from kombu.transport.base import Message +from kombu.utils.encoding import from_utf8, default_encode + +from celery import states +from celery.app.trace import ( + trace_task, + _trace_task_ret, + TraceInfo, + mro_lookup, + build_tracer, + setup_worker_optimizations, + reset_worker_optimizations, +) +from celery.concurrency.base import BasePool +from celery.exceptions import ( + Ignore, + InvalidTaskError, + Retry, + TaskRevokedError, + Terminated, + WorkerLostError, +) +from celery.five import keys, monotonic +from celery.signals import task_revoked +from celery.utils import uuid +from celery.worker import job as module +from celery.worker.job import Request, logger as req_logger +from celery.worker.state import revoked + +from celery.tests.case import ( + AppCase, + Case, + Mock, + SkipTest, + assert_signal_called, + body_from_sig, + patch, +) + + +class test_mro_lookup(Case): + + def test_order(self): + + class A(object): + pass + + class B(A): + pass + + class C(B): + pass + + class D(C): + + @classmethod + def mro(cls): + return () + + A.x = 10 + self.assertEqual(mro_lookup(C, 'x'), A) + self.assertIsNone(mro_lookup(C, 'x', stop=(A, ))) + B.x = 10 + self.assertEqual(mro_lookup(C, 'x'), B) + C.x = 10 + self.assertEqual(mro_lookup(C, 'x'), C) + self.assertIsNone(mro_lookup(D, 'x')) + + +def jail(app, task_id, name, args, kwargs): + request = {'id': task_id} + task = app.tasks[name] + task.__trace__ = None # rebuild + return trace_task( + task, task_id, args, kwargs, request=request, eager=False, app=app, + ) + + +class test_default_encode(AppCase): + + def setup(self): + if sys.version_info >= (3, 0): + raise SkipTest('py3k: not relevant') + + def test_jython(self): + prev, sys.platform = sys.platform, 'java 1.6.1' + try: + self.assertEqual(default_encode(bytes('foo')), 'foo') + finally: + sys.platform = prev + + def test_cpython(self): + prev, sys.platform = sys.platform, 'darwin' + gfe, sys.getfilesystemencoding = ( + sys.getfilesystemencoding, + lambda: 'utf-8', + ) + try: + self.assertEqual(default_encode(bytes('foo')), 'foo') + finally: + sys.platform = prev + sys.getfilesystemencoding = gfe + + +class test_Retry(AppCase): + + def test_retry_semipredicate(self): + try: + raise Exception('foo') + except Exception as exc: + ret = Retry('Retrying task', exc) + self.assertEqual(ret.exc, exc) + + +class test_trace_task(AppCase): + + def setup(self): + + @self.app.task(shared=False) + def mytask(i, **kwargs): + return i ** i + self.mytask = mytask + + @self.app.task(shared=False) + def mytask_raising(i): + raise KeyError(i) + self.mytask_raising = mytask_raising + + @patch('celery.app.trace._logger') + def test_process_cleanup_fails(self, _logger): + self.mytask.backend = Mock() + self.mytask.backend.process_cleanup = Mock(side_effect=KeyError()) + tid = uuid() + ret = jail(self.app, tid, self.mytask.name, [2], {}) + self.assertEqual(ret, 4) + self.assertTrue(self.mytask.backend.store_result.called) + self.assertIn('Process cleanup failed', _logger.error.call_args[0][0]) + + def test_process_cleanup_BaseException(self): + self.mytask.backend = Mock() + self.mytask.backend.process_cleanup = Mock(side_effect=SystemExit()) + with self.assertRaises(SystemExit): + jail(self.app, uuid(), self.mytask.name, [2], {}) + + def test_execute_jail_success(self): + ret = jail(self.app, uuid(), self.mytask.name, [2], {}) + self.assertEqual(ret, 4) + + def test_marked_as_started(self): + _started = [] + + def store_result(tid, meta, state, **kwars): + if state == states.STARTED: + _started.append(tid) + self.mytask.backend.store_result = Mock(name='store_result') + self.mytask.backend.store_result.side_effect = store_result + self.mytask.track_started = True + + tid = uuid() + jail(self.app, tid, self.mytask.name, [2], {}) + self.assertIn(tid, _started) + + self.mytask.ignore_result = True + tid = uuid() + jail(self.app, tid, self.mytask.name, [2], {}) + self.assertNotIn(tid, _started) + + def test_execute_jail_failure(self): + ret = jail( + self.app, uuid(), self.mytask_raising.name, [4], {}, + ) + self.assertIsInstance(ret, ExceptionInfo) + self.assertTupleEqual(ret.exception.args, (4, )) + + def test_execute_ignore_result(self): + + @self.app.task(shared=False, ignore_result=True) + def ignores_result(i): + return i ** i + + task_id = uuid() + ret = jail(self.app, task_id, ignores_result.name, [4], {}) + self.assertEqual(ret, 256) + self.assertFalse(self.app.AsyncResult(task_id).ready()) + + +class MockEventDispatcher(object): + + def __init__(self): + self.sent = [] + self.enabled = True + + def send(self, event, **fields): + self.sent.append(event) + + +class test_Request(AppCase): + + def setup(self): + + @self.app.task(shared=False) + def add(x, y, **kw_): + return x + y + self.add = add + + @self.app.task(shared=False) + def mytask(i, **kwargs): + return i ** i + self.mytask = mytask + + @self.app.task(shared=False) + def mytask_raising(i): + raise KeyError(i) + self.mytask_raising = mytask_raising + + def get_request(self, sig, Request=Request, **kwargs): + return Request( + body_from_sig(self.app, sig), + on_ack=Mock(), + eventer=Mock(), + app=self.app, + connection_errors=(socket.error, ), + task=sig.type, + **kwargs + ) + + def test_invalid_eta_raises_InvalidTaskError(self): + with self.assertRaises(InvalidTaskError): + self.get_request(self.add.s(2, 2).set(eta='12345')) + + def test_invalid_expires_raises_InvalidTaskError(self): + with self.assertRaises(InvalidTaskError): + self.get_request(self.add.s(2, 2).set(expires='12345')) + + def test_valid_expires_with_utc_makes_aware(self): + with patch('celery.worker.job.maybe_make_aware') as mma: + self.get_request(self.add.s(2, 2).set(expires=10)) + self.assertTrue(mma.called) + + def test_maybe_expire_when_expires_is_None(self): + req = self.get_request(self.add.s(2, 2)) + self.assertFalse(req.maybe_expire()) + + def test_on_retry_acks_if_late(self): + self.add.acks_late = True + req = self.get_request(self.add.s(2, 2)) + req.on_retry(Mock()) + req.on_ack.assert_called_with(req_logger, req.connection_errors) + + def test_on_failure_Termianted(self): + einfo = None + try: + raise Terminated('9') + except Terminated: + einfo = ExceptionInfo() + self.assertIsNotNone(einfo) + req = self.get_request(self.add.s(2, 2)) + req.on_failure(einfo) + req.eventer.send.assert_called_with( + 'task-revoked', + uuid=req.id, terminated=True, signum='9', expired=False, + ) + + def test_log_error_propagates_MemoryError(self): + einfo = None + try: + raise MemoryError() + except MemoryError: + einfo = ExceptionInfo(internal=True) + self.assertIsNotNone(einfo) + req = self.get_request(self.add.s(2, 2)) + with self.assertRaises(MemoryError): + req._log_error(einfo) + + def test_log_error_when_Ignore(self): + einfo = None + try: + raise Ignore() + except Ignore: + einfo = ExceptionInfo(internal=True) + self.assertIsNotNone(einfo) + req = self.get_request(self.add.s(2, 2)) + req._log_error(einfo) + req.on_ack.assert_called_with(req_logger, req.connection_errors) + + def test_tzlocal_is_cached(self): + req = self.get_request(self.add.s(2, 2)) + req._tzlocal = 'foo' + self.assertEqual(req.tzlocal, 'foo') + + def test_execute_magic_kwargs(self): + task = self.add.s(2, 2) + task.freeze() + req = self.get_request(task) + self.add.accept_magic_kwargs = True + pool = Mock() + req.execute_using_pool(pool) + self.assertTrue(pool.apply_async.called) + args = pool.apply_async.call_args[1]['args'] + self.assertEqual(args[0], task.task) + self.assertEqual(args[1], task.id) + self.assertEqual(args[2], task.args) + kwargs = args[3] + self.assertEqual(kwargs.get('task_name'), task.task) + + def xRequest(self, body=None, **kwargs): + body = dict({'task': self.mytask.name, + 'id': uuid(), + 'args': [1], + 'kwargs': {'f': 'x'}}, **body or {}) + return Request(body, app=self.app, **kwargs) + + def test_task_wrapper_repr(self): + self.assertTrue(repr(self.xRequest())) + + @patch('celery.worker.job.kwdict') + def test_kwdict(self, kwdict): + prev, module.NEEDS_KWDICT = module.NEEDS_KWDICT, True + try: + self.xRequest() + self.assertTrue(kwdict.called) + finally: + module.NEEDS_KWDICT = prev + + def test_sets_store_errors(self): + self.mytask.ignore_result = True + job = self.xRequest() + self.assertFalse(job.store_errors) + + self.mytask.store_errors_even_if_ignored = True + job = self.xRequest() + self.assertTrue(job.store_errors) + + def test_send_event(self): + job = self.xRequest() + job.eventer = MockEventDispatcher() + job.send_event('task-frobulated') + self.assertIn('task-frobulated', job.eventer.sent) + + def test_on_retry(self): + job = Request({ + 'task': self.mytask.name, + 'id': uuid(), + 'args': [1], + 'kwargs': {'f': 'x'}, + }, app=self.app) + job.eventer = MockEventDispatcher() + try: + raise Retry('foo', KeyError('moofoobar')) + except: + einfo = ExceptionInfo() + job.on_failure(einfo) + self.assertIn('task-retried', job.eventer.sent) + prev, module._does_info = module._does_info, False + try: + job.on_failure(einfo) + finally: + module._does_info = prev + einfo.internal = True + job.on_failure(einfo) + + def test_compat_properties(self): + job = Request({ + 'task': self.mytask.name, + 'id': uuid(), + 'args': [1], + 'kwargs': {'f': 'x'}, + }, app=self.app) + self.assertEqual(job.task_id, job.id) + self.assertEqual(job.task_name, job.name) + job.task_id = 'ID' + self.assertEqual(job.id, 'ID') + job.task_name = 'NAME' + self.assertEqual(job.name, 'NAME') + + def test_terminate__task_started(self): + pool = Mock() + signum = signal.SIGTERM + job = Request({ + 'task': self.mytask.name, + 'id': uuid(), + 'args': [1], + 'kwrgs': {'f': 'x'}, + }, app=self.app) + with assert_signal_called( + task_revoked, sender=job.task, request=job, + terminated=True, expired=False, signum=signum): + job.time_start = monotonic() + job.worker_pid = 313 + job.terminate(pool, signal='TERM') + pool.terminate_job.assert_called_with(job.worker_pid, signum) + + def test_terminate__task_reserved(self): + pool = Mock() + job = Request({ + 'task': self.mytask.name, + 'id': uuid(), + 'args': [1], + 'kwargs': {'f': 'x'}, + }, app=self.app) + job.time_start = None + job.terminate(pool, signal='TERM') + self.assertFalse(pool.terminate_job.called) + self.assertTupleEqual(job._terminate_on_ack, (pool, 15)) + job.terminate(pool, signal='TERM') + + def test_revoked_expires_expired(self): + job = Request({ + 'task': self.mytask.name, + 'id': uuid(), + 'args': [1], + 'kwargs': {'f': 'x'}, + 'expires': datetime.utcnow() - timedelta(days=1), + }, app=self.app) + with assert_signal_called( + task_revoked, sender=job.task, request=job, + terminated=False, expired=True, signum=None): + job.revoked() + self.assertIn(job.id, revoked) + self.assertEqual( + self.mytask.backend.get_status(job.id), + states.REVOKED, + ) + + def test_revoked_expires_not_expired(self): + job = self.xRequest({ + 'expires': datetime.utcnow() + timedelta(days=1), + }) + job.revoked() + self.assertNotIn(job.id, revoked) + self.assertNotEqual( + self.mytask.backend.get_status(job.id), + states.REVOKED, + ) + + def test_revoked_expires_ignore_result(self): + self.mytask.ignore_result = True + job = self.xRequest({ + 'expires': datetime.utcnow() - timedelta(days=1), + }) + job.revoked() + self.assertIn(job.id, revoked) + self.assertNotEqual( + self.mytask.backend.get_status(job.id), states.REVOKED, + ) + + def test_send_email(self): + app = self.app + mail_sent = [False] + + def mock_mail_admins(*args, **kwargs): + mail_sent[0] = True + + def get_ei(): + try: + raise KeyError('moofoobar') + except: + return ExceptionInfo() + + app.mail_admins = mock_mail_admins + self.mytask.send_error_emails = True + job = self.xRequest() + einfo = get_ei() + job.on_failure(einfo) + self.assertTrue(mail_sent[0]) + + einfo = get_ei() + mail_sent[0] = False + self.mytask.send_error_emails = False + job.on_failure(einfo) + self.assertFalse(mail_sent[0]) + + einfo = get_ei() + mail_sent[0] = False + self.mytask.send_error_emails = True + job.on_failure(einfo) + self.assertTrue(mail_sent[0]) + + def test_already_revoked(self): + job = self.xRequest() + job._already_revoked = True + self.assertTrue(job.revoked()) + + def test_revoked(self): + job = self.xRequest() + with assert_signal_called( + task_revoked, sender=job.task, request=job, + terminated=False, expired=False, signum=None): + revoked.add(job.id) + self.assertTrue(job.revoked()) + self.assertTrue(job._already_revoked) + self.assertTrue(job.acknowledged) + + def test_execute_does_not_execute_revoked(self): + job = self.xRequest() + revoked.add(job.id) + job.execute() + + def test_execute_acks_late(self): + self.mytask_raising.acks_late = True + job = self.xRequest({ + 'task': self.mytask_raising.name, + 'kwargs': {}, + }) + job.execute() + self.assertTrue(job.acknowledged) + job.execute() + + def test_execute_using_pool_does_not_execute_revoked(self): + job = self.xRequest() + revoked.add(job.id) + with self.assertRaises(TaskRevokedError): + job.execute_using_pool(None) + + def test_on_accepted_acks_early(self): + job = self.xRequest() + job.on_accepted(pid=os.getpid(), time_accepted=monotonic()) + self.assertTrue(job.acknowledged) + prev, module._does_debug = module._does_debug, False + try: + job.on_accepted(pid=os.getpid(), time_accepted=monotonic()) + finally: + module._does_debug = prev + + def test_on_accepted_acks_late(self): + job = self.xRequest() + self.mytask.acks_late = True + job.on_accepted(pid=os.getpid(), time_accepted=monotonic()) + self.assertFalse(job.acknowledged) + + def test_on_accepted_terminates(self): + signum = signal.SIGTERM + pool = Mock() + job = self.xRequest() + with assert_signal_called( + task_revoked, sender=job.task, request=job, + terminated=True, expired=False, signum=signum): + job.terminate(pool, signal='TERM') + self.assertFalse(pool.terminate_job.call_count) + job.on_accepted(pid=314, time_accepted=monotonic()) + pool.terminate_job.assert_called_with(314, signum) + + def test_on_success_acks_early(self): + job = self.xRequest() + job.time_start = 1 + job.on_success(42) + prev, module._does_info = module._does_info, False + try: + job.on_success(42) + self.assertFalse(job.acknowledged) + finally: + module._does_info = prev + + def test_on_success_BaseException(self): + job = self.xRequest() + job.time_start = 1 + with self.assertRaises(SystemExit): + try: + raise SystemExit() + except SystemExit: + job.on_success(ExceptionInfo()) + else: + assert False + + def test_on_success_eventer(self): + job = self.xRequest() + job.time_start = 1 + job.eventer = Mock() + job.eventer.send = Mock() + job.on_success(42) + self.assertTrue(job.eventer.send.called) + + def test_on_success_when_failure(self): + job = self.xRequest() + job.time_start = 1 + job.on_failure = Mock() + try: + raise KeyError('foo') + except Exception: + job.on_success(ExceptionInfo()) + self.assertTrue(job.on_failure.called) + + def test_on_success_acks_late(self): + job = self.xRequest() + job.time_start = 1 + self.mytask.acks_late = True + job.on_success(42) + self.assertTrue(job.acknowledged) + + def test_on_failure_WorkerLostError(self): + + def get_ei(): + try: + raise WorkerLostError('do re mi') + except WorkerLostError: + return ExceptionInfo() + + job = self.xRequest() + exc_info = get_ei() + job.on_failure(exc_info) + self.assertEqual( + self.mytask.backend.get_status(job.id), states.FAILURE, + ) + + self.mytask.ignore_result = True + exc_info = get_ei() + job = self.xRequest() + job.on_failure(exc_info) + self.assertEqual( + self.mytask.backend.get_status(job.id), states.PENDING, + ) + + def test_on_failure_acks_late(self): + job = self.xRequest() + job.time_start = 1 + self.mytask.acks_late = True + try: + raise KeyError('foo') + except KeyError: + exc_info = ExceptionInfo() + job.on_failure(exc_info) + self.assertTrue(job.acknowledged) + + def test_from_message_invalid_kwargs(self): + body = dict(task=self.mytask.name, id=1, args=(), kwargs='foo') + with self.assertRaises(InvalidTaskError): + Request(body, message=None, app=self.app) + + @patch('celery.worker.job.error') + @patch('celery.worker.job.warn') + def test_on_timeout(self, warn, error): + + job = self.xRequest() + job.on_timeout(soft=True, timeout=1337) + self.assertIn('Soft time limit', warn.call_args[0][0]) + job.on_timeout(soft=False, timeout=1337) + self.assertIn('Hard time limit', error.call_args[0][0]) + self.assertEqual( + self.mytask.backend.get_status(job.id), states.FAILURE, + ) + + self.mytask.ignore_result = True + job = self.xRequest() + job.on_timeout(soft=True, timeout=1336) + self.assertEqual( + self.mytask.backend.get_status(job.id), states.PENDING, + ) + + def test_fast_trace_task(self): + from celery.app import trace + setup_worker_optimizations(self.app) + self.assertIs(trace.trace_task_ret, trace._fast_trace_task) + try: + self.mytask.__trace__ = build_tracer( + self.mytask.name, self.mytask, self.app.loader, 'test', + app=self.app, + ) + res = trace.trace_task_ret(self.mytask.name, uuid(), [4], {}) + self.assertEqual(res, 4 ** 4) + finally: + reset_worker_optimizations() + self.assertIs(trace.trace_task_ret, trace._trace_task_ret) + delattr(self.mytask, '__trace__') + res = trace.trace_task_ret( + self.mytask.name, uuid(), [4], {}, app=self.app, + ) + self.assertEqual(res, 4 ** 4) + + def test_trace_task_ret(self): + self.mytask.__trace__ = build_tracer( + self.mytask.name, self.mytask, self.app.loader, 'test', + app=self.app, + ) + res = _trace_task_ret(self.mytask.name, uuid(), [4], {}, app=self.app) + self.assertEqual(res, 4 ** 4) + + def test_trace_task_ret__no_trace(self): + try: + delattr(self.mytask, '__trace__') + except AttributeError: + pass + res = _trace_task_ret(self.mytask.name, uuid(), [4], {}, app=self.app) + self.assertEqual(res, 4 ** 4) + + def test_trace_catches_exception(self): + + def _error_exec(self, *args, **kwargs): + raise KeyError('baz') + + @self.app.task(request=None, shared=False) + def raising(): + raise KeyError('baz') + + with self.assertWarnsRegex(RuntimeWarning, + r'Exception raised outside'): + res = trace_task(raising, uuid(), [], {}, app=self.app) + self.assertIsInstance(res, ExceptionInfo) + + def test_worker_task_trace_handle_retry(self): + tid = uuid() + self.mytask.push_request(id=tid) + try: + raise ValueError('foo') + except Exception as exc: + try: + raise Retry(str(exc), exc=exc) + except Retry as exc: + w = TraceInfo(states.RETRY, exc) + w.handle_retry(self.mytask, store_errors=False) + self.assertEqual( + self.mytask.backend.get_status(tid), states.PENDING, + ) + w.handle_retry(self.mytask, store_errors=True) + self.assertEqual( + self.mytask.backend.get_status(tid), states.RETRY, + ) + finally: + self.mytask.pop_request() + + def test_worker_task_trace_handle_failure(self): + tid = uuid() + self.mytask.push_request() + try: + self.mytask.request.id = tid + try: + raise ValueError('foo') + except Exception as exc: + w = TraceInfo(states.FAILURE, exc) + w.handle_failure(self.mytask, store_errors=False) + self.assertEqual( + self.mytask.backend.get_status(tid), states.PENDING, + ) + w.handle_failure(self.mytask, store_errors=True) + self.assertEqual( + self.mytask.backend.get_status(tid), states.FAILURE, + ) + finally: + self.mytask.pop_request() + + def test_task_wrapper_mail_attrs(self): + job = self.xRequest({'args': [], 'kwargs': {}}) + x = job.success_msg % { + 'name': job.name, + 'id': job.id, + 'return_value': 10, + 'runtime': 0.3641, + } + self.assertTrue(x) + x = job.error_msg % { + 'name': job.name, + 'id': job.id, + 'exc': 'FOOBARBAZ', + 'description': 'raised unexpected', + 'traceback': 'foobarbaz', + } + self.assertTrue(x) + + def test_from_message(self): + us = 'æØåveéðƒeæ' + body = {'task': self.mytask.name, 'id': uuid(), + 'args': [2], 'kwargs': {us: 'bar'}} + m = Message(None, body=anyjson.dumps(body), backend='foo', + content_type='application/json', + content_encoding='utf-8') + job = Request(m.decode(), message=m, app=self.app) + self.assertIsInstance(job, Request) + self.assertEqual(job.name, body['task']) + self.assertEqual(job.id, body['id']) + self.assertEqual(job.args, body['args']) + us = from_utf8(us) + if sys.version_info < (2, 6): + self.assertEqual(next(keys(job.kwargs)), us) + self.assertIsInstance(next(keys(job.kwargs)), str) + + def test_from_message_empty_args(self): + body = {'task': self.mytask.name, 'id': uuid()} + m = Message(None, body=anyjson.dumps(body), backend='foo', + content_type='application/json', + content_encoding='utf-8') + job = Request(m.decode(), message=m, app=self.app) + self.assertIsInstance(job, Request) + self.assertEqual(job.args, []) + self.assertEqual(job.kwargs, {}) + + def test_from_message_missing_required_fields(self): + body = {} + m = Message(None, body=anyjson.dumps(body), backend='foo', + content_type='application/json', + content_encoding='utf-8') + with self.assertRaises(KeyError): + Request(m.decode(), message=m, app=self.app) + + def test_from_message_nonexistant_task(self): + body = {'task': 'cu.mytask.doesnotexist', 'id': uuid(), + 'args': [2], 'kwargs': {'æØåveéðƒeæ': 'bar'}} + m = Message(None, body=anyjson.dumps(body), backend='foo', + content_type='application/json', + content_encoding='utf-8') + with self.assertRaises(KeyError): + Request(m.decode(), message=m, app=self.app) + + def test_execute(self): + tid = uuid() + job = self.xRequest({'id': tid, 'args': [4], 'kwargs': {}}) + self.assertEqual(job.execute(), 256) + meta = self.mytask.backend.get_task_meta(tid) + self.assertEqual(meta['status'], states.SUCCESS) + self.assertEqual(meta['result'], 256) + + def test_execute_success_no_kwargs(self): + + @self.app.task # traverses coverage for decorator without parens + def mytask_no_kwargs(i): + return i ** i + + tid = uuid() + job = self.xRequest({ + 'task': mytask_no_kwargs.name, + 'id': tid, + 'args': [4], + 'kwargs': {}, + }) + self.assertEqual(job.execute(), 256) + meta = mytask_no_kwargs.backend.get_task_meta(tid) + self.assertEqual(meta['result'], 256) + self.assertEqual(meta['status'], states.SUCCESS) + + def test_execute_success_some_kwargs(self): + scratch = {'task_id': None} + + @self.app.task(shared=False, accept_magic_kwargs=True) + def mytask_some_kwargs(i, task_id): + scratch['task_id'] = task_id + return i ** i + + tid = uuid() + job = self.xRequest({ + 'task': mytask_some_kwargs.name, + 'id': tid, + 'args': [4], + 'kwargs': {}, + }) + self.assertEqual(job.execute(), 256) + meta = mytask_some_kwargs.backend.get_task_meta(tid) + self.assertEqual(scratch.get('task_id'), tid) + self.assertEqual(meta['result'], 256) + self.assertEqual(meta['status'], states.SUCCESS) + + def test_execute_ack(self): + scratch = {'ACK': False} + + def on_ack(*args, **kwargs): + scratch['ACK'] = True + + tid = uuid() + job = self.xRequest({'id': tid, 'args': [4]}, on_ack=on_ack) + self.assertEqual(job.execute(), 256) + meta = self.mytask.backend.get_task_meta(tid) + self.assertTrue(scratch['ACK']) + self.assertEqual(meta['result'], 256) + self.assertEqual(meta['status'], states.SUCCESS) + + def test_execute_fail(self): + tid = uuid() + job = self.xRequest({ + 'task': self.mytask_raising.name, + 'id': tid, + 'args': [4], + 'kwargs': {}, + }) + self.assertIsInstance(job.execute(), ExceptionInfo) + meta = self.mytask_raising.backend.get_task_meta(tid) + self.assertEqual(meta['status'], states.FAILURE) + self.assertIsInstance(meta['result'], KeyError) + + def test_execute_using_pool(self): + tid = uuid() + job = self.xRequest({'id': tid, 'args': [4]}) + + class MockPool(BasePool): + target = None + args = None + kwargs = None + + def __init__(self, *args, **kwargs): + pass + + def apply_async(self, target, args=None, kwargs=None, + *margs, **mkwargs): + self.target = target + self.args = args + self.kwargs = kwargs + + p = MockPool() + job.execute_using_pool(p) + self.assertTrue(p.target) + self.assertEqual(p.args[0], self.mytask.name) + self.assertEqual(p.args[1], tid) + self.assertEqual(p.args[2], [4]) + self.assertIn('f', p.args[3]) + self.assertIn([4], p.args) + + job.task.accept_magic_kwargs = False + job.execute_using_pool(p) + + def test_default_kwargs(self): + self.maxDiff = 3000 + tid = uuid() + job = self.xRequest({'id': tid, 'args': [4]}) + self.assertDictEqual( + job.extend_with_default_kwargs(), { + 'f': 'x', + 'logfile': None, + 'loglevel': None, + 'task_id': job.id, + 'task_retries': 0, + 'task_is_eager': False, + 'delivery_info': { + 'exchange': None, + 'routing_key': None, + 'priority': 0, + 'redelivered': False, + }, + 'task_name': job.name}) + + @patch('celery.worker.job.logger') + def _test_on_failure(self, exception, logger): + app = self.app + tid = uuid() + job = self.xRequest({'id': tid, 'args': [4]}) + try: + raise exception + except Exception: + exc_info = ExceptionInfo() + app.conf.CELERY_SEND_TASK_ERROR_EMAILS = True + job.on_failure(exc_info) + self.assertTrue(logger.log.called) + context = logger.log.call_args[0][2] + self.assertEqual(self.mytask.name, context['name']) + self.assertIn(tid, context['id']) + + def test_on_failure(self): + self._test_on_failure(Exception('Inside unit tests')) + + def test_on_failure_unicode_exception(self): + self._test_on_failure(Exception('Бобры атакуют')) + + def test_on_failure_utf8_exception(self): + self._test_on_failure(Exception( + from_utf8('Бобры атакуют'))) diff --git a/celery/tests/worker/test_revoke.py b/celery/tests/worker/test_revoke.py new file mode 100644 index 0000000..4d5ad02 --- /dev/null +++ b/celery/tests/worker/test_revoke.py @@ -0,0 +1,13 @@ +from __future__ import absolute_import + +from celery.worker import state +from celery.tests.case import AppCase + + +class test_revoked(AppCase): + + def test_is_working(self): + state.revoked.add('foo') + self.assertIn('foo', state.revoked) + state.revoked.pop_value('foo') + self.assertNotIn('foo', state.revoked) diff --git a/celery/tests/worker/test_state.py b/celery/tests/worker/test_state.py new file mode 100644 index 0000000..ede9a00 --- /dev/null +++ b/celery/tests/worker/test_state.py @@ -0,0 +1,161 @@ +from __future__ import absolute_import + +import pickle + +from time import time + +from celery.datastructures import LimitedSet +from celery.exceptions import WorkerShutdown, WorkerTerminate +from celery.worker import state + +from celery.tests.case import AppCase, Mock, patch + + +class StateResetCase(AppCase): + + def setup(self): + self.reset_state() + + def teardown(self): + self.reset_state() + + def reset_state(self): + state.active_requests.clear() + state.revoked.clear() + state.total_count.clear() + + +class MockShelve(dict): + filename = None + in_sync = False + closed = False + + def open(self, filename, **kwargs): + self.filename = filename + return self + + def sync(self): + self.in_sync = True + + def close(self): + self.closed = True + + +class MyPersistent(state.Persistent): + storage = MockShelve() + + +class test_maybe_shutdown(AppCase): + + def teardown(self): + state.should_stop = False + state.should_terminate = False + + def test_should_stop(self): + state.should_stop = True + with self.assertRaises(WorkerShutdown): + state.maybe_shutdown() + + def test_should_terminate(self): + state.should_terminate = True + with self.assertRaises(WorkerTerminate): + state.maybe_shutdown() + + +class test_Persistent(StateResetCase): + + def setup(self): + self.reset_state() + self.p = MyPersistent(state, filename='celery-state') + + def test_close_twice(self): + self.p._is_open = False + self.p.close() + + def test_constructor(self): + self.assertDictEqual(self.p.db, {}) + self.assertEqual(self.p.db.filename, self.p.filename) + + def test_save(self): + self.p.db['foo'] = 'bar' + self.p.save() + self.assertTrue(self.p.db.in_sync) + self.assertTrue(self.p.db.closed) + + def add_revoked(self, *ids): + for id in ids: + self.p.db.setdefault('revoked', LimitedSet()).add(id) + + def test_merge(self, data=['foo', 'bar', 'baz']): + self.add_revoked(*data) + self.p.merge() + for item in data: + self.assertIn(item, state.revoked) + + def test_merge_dict(self): + self.p.clock = Mock() + self.p.clock.adjust.return_value = 626 + d = {'revoked': {'abc': time()}, 'clock': 313} + self.p._merge_with(d) + self.p.clock.adjust.assert_called_with(313) + self.assertEqual(d['clock'], 626) + self.assertIn('abc', state.revoked) + + def test_sync_clock_and_purge(self): + passthrough = Mock() + passthrough.side_effect = lambda x: x + with patch('celery.worker.state.revoked') as revoked: + d = {'clock': 0} + self.p.clock = Mock() + self.p.clock.forward.return_value = 627 + self.p._dumps = passthrough + self.p.compress = passthrough + self.p._sync_with(d) + revoked.purge.assert_called_with() + self.assertEqual(d['clock'], 627) + self.assertNotIn('revoked', d) + self.assertIs(d['zrevoked'], revoked) + + def test_sync(self, data1=['foo', 'bar', 'baz'], + data2=['baz', 'ini', 'koz']): + self.add_revoked(*data1) + for item in data2: + state.revoked.add(item) + self.p.sync() + + self.assertTrue(self.p.db['zrevoked']) + pickled = self.p.decompress(self.p.db['zrevoked']) + self.assertTrue(pickled) + saved = pickle.loads(pickled) + for item in data2: + self.assertIn(item, saved) + + +class SimpleReq(object): + + def __init__(self, name): + self.name = name + + +class test_state(StateResetCase): + + def test_accepted(self, requests=[SimpleReq('foo'), + SimpleReq('bar'), + SimpleReq('baz'), + SimpleReq('baz')]): + for request in requests: + state.task_accepted(request) + for req in requests: + self.assertIn(req, state.active_requests) + self.assertEqual(state.total_count['foo'], 1) + self.assertEqual(state.total_count['bar'], 1) + self.assertEqual(state.total_count['baz'], 2) + + def test_ready(self, requests=[SimpleReq('foo'), + SimpleReq('bar')]): + for request in requests: + state.task_accepted(request) + self.assertEqual(len(state.active_requests), 2) + for request in requests: + state.task_ready(request) + self.assertEqual(len(state.active_requests), 0) diff --git a/celery/tests/worker/test_strategy.py b/celery/tests/worker/test_strategy.py new file mode 100644 index 0000000..7edf78b --- /dev/null +++ b/celery/tests/worker/test_strategy.py @@ -0,0 +1,139 @@ +from __future__ import absolute_import + +from collections import defaultdict +from contextlib import contextmanager + +from kombu.utils.limits import TokenBucket + +from celery.worker import state +from celery.utils.timeutils import rate + +from celery.tests.case import AppCase, Mock, patch, body_from_sig + + +class test_default_strategy(AppCase): + + def setup(self): + @self.app.task(shared=False) + def add(x, y): + return x + y + + self.add = add + + class Context(object): + + def __init__(self, sig, s, reserved, consumer, message, body): + self.sig = sig + self.s = s + self.reserved = reserved + self.consumer = consumer + self.message = message + self.body = body + + def __call__(self, **kwargs): + return self.s( + self.message, self.body, + self.message.ack, self.message.reject, [], **kwargs + ) + + def was_reserved(self): + return self.reserved.called + + def was_rate_limited(self): + assert not self.was_reserved() + return self.consumer._limit_task.called + + def was_scheduled(self): + assert not self.was_reserved() + assert not self.was_rate_limited() + return self.consumer.timer.call_at.called + + def event_sent(self): + return self.consumer.event_dispatcher.send.call_args + + def get_request(self): + if self.was_reserved(): + return self.reserved.call_args[0][0] + if self.was_rate_limited(): + return self.consumer._limit_task.call_args[0][0] + if self.was_scheduled(): + return self.consumer.timer.call_at.call_args[0][0] + raise ValueError('request not handled') + + @contextmanager + def _context(self, sig, + rate_limits=True, events=True, utc=True, limit=None): + self.assertTrue(sig.type.Strategy) + + reserved = Mock() + consumer = Mock() + consumer.task_buckets = defaultdict(lambda: None) + if limit: + bucket = TokenBucket(rate(limit), capacity=1) + consumer.task_buckets[sig.task] = bucket + consumer.disable_rate_limits = not rate_limits + consumer.event_dispatcher.enabled = events + s = sig.type.start_strategy(self.app, consumer, task_reserved=reserved) + self.assertTrue(s) + + message = Mock() + body = body_from_sig(self.app, sig, utc=utc) + + yield self.Context(sig, s, reserved, consumer, message, body) + + def test_when_logging_disabled(self): + with patch('celery.worker.strategy.logger') as logger: + logger.isEnabledFor.return_value = False + with self._context(self.add.s(2, 2)) as C: + C() + self.assertFalse(logger.info.called) + + def test_task_strategy(self): + with self._context(self.add.s(2, 2)) as C: + C() + self.assertTrue(C.was_reserved()) + req = C.get_request() + C.consumer.on_task_request.assert_called_with(req) + self.assertTrue(C.event_sent()) + + def test_when_events_disabled(self): + with self._context(self.add.s(2, 2), events=False) as C: + C() + self.assertTrue(C.was_reserved()) + self.assertFalse(C.event_sent()) + + def test_eta_task(self): + with self._context(self.add.s(2, 2).set(countdown=10)) as C: + C() + self.assertTrue(C.was_scheduled()) + C.consumer.qos.increment_eventually.assert_called_with() + + def test_eta_task_utc_disabled(self): + with self._context(self.add.s(2, 2).set(countdown=10), utc=False) as C: + C() + self.assertTrue(C.was_scheduled()) + C.consumer.qos.increment_eventually.assert_called_with() + + def test_when_rate_limited(self): + task = self.add.s(2, 2) + with self._context(task, rate_limits=True, limit='1/m') as C: + C() + self.assertTrue(C.was_rate_limited()) + + def test_when_rate_limited__limits_disabled(self): + task = self.add.s(2, 2) + with self._context(task, rate_limits=False, limit='1/m') as C: + C() + self.assertTrue(C.was_reserved()) + + def test_when_revoked(self): + task = self.add.s(2, 2) + task.freeze() + state.revoked.add(task.id) + try: + with self._context(task) as C: + C() + with self.assertRaises(ValueError): + C.get_request() + finally: + state.revoked.discard(task.id) diff --git a/celery/tests/worker/test_worker.py b/celery/tests/worker/test_worker.py new file mode 100644 index 0000000..27451d0 --- /dev/null +++ b/celery/tests/worker/test_worker.py @@ -0,0 +1,1130 @@ +from __future__ import absolute_import, print_function + +import os +import socket + +from collections import deque +from datetime import datetime, timedelta +from threading import Event + +from amqp import ChannelError +from kombu import Connection +from kombu.common import QoS, ignore_errors +from kombu.transport.base import Message + +from celery.app.defaults import DEFAULTS +from celery.bootsteps import RUN, CLOSE, StartStopStep +from celery.concurrency.base import BasePool +from celery.datastructures import AttributeDict +from celery.exceptions import ( + WorkerShutdown, WorkerTerminate, TaskRevokedError, +) +from celery.five import Empty, range, Queue as FastQueue +from celery.utils import uuid +from celery.worker import components +from celery.worker import consumer +from celery.worker.consumer import Consumer as __Consumer +from celery.worker.job import Request +from celery.utils import worker_direct +from celery.utils.serialization import pickle +from celery.utils.timer2 import Timer + +from celery.tests.case import AppCase, Mock, SkipTest, patch, restore_logging + + +def MockStep(step=None): + step = Mock() if step is None else step + step.blueprint = Mock() + step.blueprint.name = 'MockNS' + step.name = 'MockStep(%s)' % (id(step), ) + return step + + +def mock_event_dispatcher(): + evd = Mock(name='event_dispatcher') + evd.groups = ['worker'] + evd._outbound_buffer = deque() + return evd + + +class PlaceHolder(object): + pass + + +def find_step(obj, typ): + return obj.blueprint.steps[typ.name] + + +class Consumer(__Consumer): + + def __init__(self, *args, **kwargs): + kwargs.setdefault('without_mingle', True) # disable Mingle step + kwargs.setdefault('without_gossip', True) # disable Gossip step + kwargs.setdefault('without_heartbeat', True) # disable Heart step + super(Consumer, self).__init__(*args, **kwargs) + + +class _MyKombuConsumer(Consumer): + broadcast_consumer = Mock() + task_consumer = Mock() + + def __init__(self, *args, **kwargs): + kwargs.setdefault('pool', BasePool(2)) + super(_MyKombuConsumer, self).__init__(*args, **kwargs) + + def restart_heartbeat(self): + self.heart = None + + +class MyKombuConsumer(Consumer): + + def loop(self, *args, **kwargs): + pass + + +class MockNode(object): + commands = [] + + def handle_message(self, body, message): + self.commands.append(body.pop('command', None)) + + +class MockEventDispatcher(object): + sent = [] + closed = False + flushed = False + _outbound_buffer = [] + + def send(self, event, *args, **kwargs): + self.sent.append(event) + + def close(self): + self.closed = True + + def flush(self): + self.flushed = True + + +class MockHeart(object): + closed = False + + def stop(self): + self.closed = True + + +def create_message(channel, **data): + data.setdefault('id', uuid()) + channel.no_ack_consumers = set() + m = Message(channel, body=pickle.dumps(dict(**data)), + content_type='application/x-python-serialize', + content_encoding='binary', + delivery_info={'consumer_tag': 'mock'}) + m.accept = ['application/x-python-serialize'] + return m + + +class test_Consumer(AppCase): + + def setup(self): + self.buffer = FastQueue() + self.timer = Timer() + + @self.app.task(shared=False) + def foo_task(x, y, z): + return x * y * z + self.foo_task = foo_task + + def teardown(self): + self.timer.stop() + + def test_info(self): + l = MyKombuConsumer(self.buffer.put, timer=self.timer, app=self.app) + l.task_consumer = Mock() + l.qos = QoS(l.task_consumer.qos, 10) + l.connection = Mock() + l.connection.info.return_value = {'foo': 'bar'} + l.controller = l.app.WorkController() + l.controller.pool = Mock() + l.controller.pool.info.return_value = [Mock(), Mock()] + l.controller.consumer = l + info = l.controller.stats() + self.assertEqual(info['prefetch_count'], 10) + self.assertTrue(info['broker']) + + def test_start_when_closed(self): + l = MyKombuConsumer(self.buffer.put, timer=self.timer, app=self.app) + l.blueprint.state = CLOSE + l.start() + + def test_connection(self): + l = MyKombuConsumer(self.buffer.put, timer=self.timer, app=self.app) + + l.blueprint.start(l) + self.assertIsInstance(l.connection, Connection) + + l.blueprint.state = RUN + l.event_dispatcher = None + l.blueprint.restart(l) + self.assertTrue(l.connection) + + l.blueprint.state = RUN + l.shutdown() + self.assertIsNone(l.connection) + self.assertIsNone(l.task_consumer) + + l.blueprint.start(l) + self.assertIsInstance(l.connection, Connection) + l.blueprint.restart(l) + + l.stop() + l.shutdown() + self.assertIsNone(l.connection) + self.assertIsNone(l.task_consumer) + + def test_close_connection(self): + l = MyKombuConsumer(self.buffer.put, timer=self.timer, app=self.app) + l.blueprint.state = RUN + step = find_step(l, consumer.Connection) + conn = l.connection = Mock() + step.shutdown(l) + self.assertTrue(conn.close.called) + self.assertIsNone(l.connection) + + l = MyKombuConsumer(self.buffer.put, timer=self.timer, app=self.app) + eventer = l.event_dispatcher = mock_event_dispatcher() + eventer.enabled = True + heart = l.heart = MockHeart() + l.blueprint.state = RUN + Events = find_step(l, consumer.Events) + Events.shutdown(l) + Heart = find_step(l, consumer.Heart) + Heart.shutdown(l) + self.assertTrue(eventer.close.call_count) + self.assertTrue(heart.closed) + + @patch('celery.worker.consumer.warn') + def test_receive_message_unknown(self, warn): + l = _MyKombuConsumer(self.buffer.put, timer=self.timer, app=self.app) + l.blueprint.state = RUN + l.steps.pop() + backend = Mock() + m = create_message(backend, unknown={'baz': '!!!'}) + l.event_dispatcher = mock_event_dispatcher() + l.node = MockNode() + + callback = self._get_on_message(l) + callback(m.decode(), m) + self.assertTrue(warn.call_count) + + @patch('celery.worker.strategy.to_timestamp') + def test_receive_message_eta_OverflowError(self, to_timestamp): + to_timestamp.side_effect = OverflowError() + l = _MyKombuConsumer(self.buffer.put, timer=self.timer, app=self.app) + l.blueprint.state = RUN + l.steps.pop() + m = create_message(Mock(), task=self.foo_task.name, + args=('2, 2'), + kwargs={}, + eta=datetime.now().isoformat()) + l.event_dispatcher = mock_event_dispatcher() + l.node = MockNode() + l.update_strategies() + l.qos = Mock() + + callback = self._get_on_message(l) + callback(m.decode(), m) + self.assertTrue(m.acknowledged) + + @patch('celery.worker.consumer.error') + def test_receive_message_InvalidTaskError(self, error): + l = _MyKombuConsumer(self.buffer.put, timer=self.timer, app=self.app) + l.blueprint.state = RUN + l.event_dispatcher = mock_event_dispatcher() + l.steps.pop() + m = create_message(Mock(), task=self.foo_task.name, + args=(1, 2), kwargs='foobarbaz', id=1) + l.update_strategies() + l.event_dispatcher = mock_event_dispatcher() + + callback = self._get_on_message(l) + callback(m.decode(), m) + self.assertIn('Received invalid task message', error.call_args[0][0]) + + @patch('celery.worker.consumer.crit') + def test_on_decode_error(self, crit): + l = Consumer(self.buffer.put, timer=self.timer, app=self.app) + + class MockMessage(Mock): + content_type = 'application/x-msgpack' + content_encoding = 'binary' + body = 'foobarbaz' + + message = MockMessage() + l.on_decode_error(message, KeyError('foo')) + self.assertTrue(message.ack.call_count) + self.assertIn("Can't decode message body", crit.call_args[0][0]) + + def _get_on_message(self, l): + if l.qos is None: + l.qos = Mock() + l.event_dispatcher = mock_event_dispatcher() + l.task_consumer = Mock() + l.connection = Mock() + l.connection.drain_events.side_effect = WorkerShutdown() + + with self.assertRaises(WorkerShutdown): + l.loop(*l.loop_args()) + self.assertTrue(l.task_consumer.register_callback.called) + return l.task_consumer.register_callback.call_args[0][0] + + def test_receieve_message(self): + l = Consumer(self.buffer.put, timer=self.timer, app=self.app) + l.blueprint.state = RUN + l.event_dispatcher = mock_event_dispatcher() + m = create_message(Mock(), task=self.foo_task.name, + args=[2, 4, 8], kwargs={}) + l.update_strategies() + callback = self._get_on_message(l) + callback(m.decode(), m) + + in_bucket = self.buffer.get_nowait() + self.assertIsInstance(in_bucket, Request) + self.assertEqual(in_bucket.name, self.foo_task.name) + self.assertEqual(in_bucket.execute(), 2 * 4 * 8) + self.assertTrue(self.timer.empty()) + + def test_start_channel_error(self): + + class MockConsumer(Consumer): + iterations = 0 + + def loop(self, *args, **kwargs): + if not self.iterations: + self.iterations = 1 + raise KeyError('foo') + raise SyntaxError('bar') + + l = MockConsumer(self.buffer.put, timer=self.timer, + send_events=False, pool=BasePool(), app=self.app) + l.channel_errors = (KeyError, ) + with self.assertRaises(KeyError): + l.start() + l.timer.stop() + + def test_start_connection_error(self): + + class MockConsumer(Consumer): + iterations = 0 + + def loop(self, *args, **kwargs): + if not self.iterations: + self.iterations = 1 + raise KeyError('foo') + raise SyntaxError('bar') + + l = MockConsumer(self.buffer.put, timer=self.timer, + send_events=False, pool=BasePool(), app=self.app) + + l.connection_errors = (KeyError, ) + self.assertRaises(SyntaxError, l.start) + l.timer.stop() + + def test_loop_ignores_socket_timeout(self): + + class Connection(self.app.connection().__class__): + obj = None + + def drain_events(self, **kwargs): + self.obj.connection = None + raise socket.timeout(10) + + l = MyKombuConsumer(self.buffer.put, timer=self.timer, app=self.app) + l.connection = Connection() + l.task_consumer = Mock() + l.connection.obj = l + l.qos = QoS(l.task_consumer.qos, 10) + l.loop(*l.loop_args()) + + def test_loop_when_socket_error(self): + + class Connection(self.app.connection().__class__): + obj = None + + def drain_events(self, **kwargs): + self.obj.connection = None + raise socket.error('foo') + + l = Consumer(self.buffer.put, timer=self.timer, app=self.app) + l.blueprint.state = RUN + c = l.connection = Connection() + l.connection.obj = l + l.task_consumer = Mock() + l.qos = QoS(l.task_consumer.qos, 10) + with self.assertRaises(socket.error): + l.loop(*l.loop_args()) + + l.blueprint.state = CLOSE + l.connection = c + l.loop(*l.loop_args()) + + def test_loop(self): + + class Connection(self.app.connection().__class__): + obj = None + + def drain_events(self, **kwargs): + self.obj.connection = None + + l = Consumer(self.buffer.put, timer=self.timer, app=self.app) + l.blueprint.state = RUN + l.connection = Connection() + l.connection.obj = l + l.task_consumer = Mock() + l.qos = QoS(l.task_consumer.qos, 10) + + l.loop(*l.loop_args()) + l.loop(*l.loop_args()) + self.assertTrue(l.task_consumer.consume.call_count) + l.task_consumer.qos.assert_called_with(prefetch_count=10) + self.assertEqual(l.qos.value, 10) + l.qos.decrement_eventually() + self.assertEqual(l.qos.value, 9) + l.qos.update() + self.assertEqual(l.qos.value, 9) + l.task_consumer.qos.assert_called_with(prefetch_count=9) + + def test_ignore_errors(self): + l = MyKombuConsumer(self.buffer.put, timer=self.timer, app=self.app) + l.connection_errors = (AttributeError, KeyError, ) + l.channel_errors = (SyntaxError, ) + ignore_errors(l, Mock(side_effect=AttributeError('foo'))) + ignore_errors(l, Mock(side_effect=KeyError('foo'))) + ignore_errors(l, Mock(side_effect=SyntaxError('foo'))) + with self.assertRaises(IndexError): + ignore_errors(l, Mock(side_effect=IndexError('foo'))) + + def test_apply_eta_task(self): + from celery.worker import state + l = MyKombuConsumer(self.buffer.put, timer=self.timer, app=self.app) + l.qos = QoS(None, 10) + + task = object() + qos = l.qos.value + l.apply_eta_task(task) + self.assertIn(task, state.reserved_requests) + self.assertEqual(l.qos.value, qos - 1) + self.assertIs(self.buffer.get_nowait(), task) + + def test_receieve_message_eta_isoformat(self): + raise SkipTest('broken test, may fail at random') + l = _MyKombuConsumer(self.buffer.put, timer=self.timer, app=self.app) + l.blueprint.state = RUN + l.steps.pop() + m = create_message( + Mock(), task=self.foo_task.name, + eta=(datetime.now() + timedelta(days=1)).isoformat(), + args=[2, 4, 8], kwargs={}, + ) + + l.task_consumer = Mock() + l.qos = QoS(l.task_consumer.qos, 1) + current_pcount = l.qos.value + l.event_dispatcher = mock_event_dispatcher() + l.enabled = False + l.update_strategies() + callback = self._get_on_message(l) + callback(m.decode(), m) + l.timer.stop() + l.timer.join(1) + + items = [entry[2] for entry in self.timer.queue] + found = 0 + for item in items: + if item.args[0].name == self.foo_task.name: + found = True + self.assertTrue(found) + self.assertGreater(l.qos.value, current_pcount) + l.timer.stop() + + def test_pidbox_callback(self): + l = MyKombuConsumer(self.buffer.put, timer=self.timer, app=self.app) + con = find_step(l, consumer.Control).box + con.node = Mock() + con.reset = Mock() + + con.on_message('foo', 'bar') + con.node.handle_message.assert_called_with('foo', 'bar') + + con.node = Mock() + con.node.handle_message.side_effect = KeyError('foo') + con.on_message('foo', 'bar') + con.node.handle_message.assert_called_with('foo', 'bar') + + con.node = Mock() + con.node.handle_message.side_effect = ValueError('foo') + con.on_message('foo', 'bar') + con.node.handle_message.assert_called_with('foo', 'bar') + self.assertTrue(con.reset.called) + + def test_revoke(self): + l = _MyKombuConsumer(self.buffer.put, timer=self.timer, app=self.app) + l.blueprint.state = RUN + l.steps.pop() + backend = Mock() + id = uuid() + t = create_message(backend, task=self.foo_task.name, args=[2, 4, 8], + kwargs={}, id=id) + from celery.worker.state import revoked + revoked.add(id) + + callback = self._get_on_message(l) + callback(t.decode(), t) + self.assertTrue(self.buffer.empty()) + + def test_receieve_message_not_registered(self): + l = _MyKombuConsumer(self.buffer.put, timer=self.timer, app=self.app) + l.blueprint.state = RUN + l.steps.pop() + backend = Mock() + m = create_message(backend, task='x.X.31x', args=[2, 4, 8], kwargs={}) + + l.event_dispatcher = mock_event_dispatcher() + callback = self._get_on_message(l) + self.assertFalse(callback(m.decode(), m)) + with self.assertRaises(Empty): + self.buffer.get_nowait() + self.assertTrue(self.timer.empty()) + + @patch('celery.worker.consumer.warn') + @patch('celery.worker.consumer.logger') + def test_receieve_message_ack_raises(self, logger, warn): + l = Consumer(self.buffer.put, timer=self.timer, app=self.app) + l.blueprint.state = RUN + backend = Mock() + m = create_message(backend, args=[2, 4, 8], kwargs={}) + + l.event_dispatcher = mock_event_dispatcher() + l.connection_errors = (socket.error, ) + m.reject = Mock() + m.reject.side_effect = socket.error('foo') + callback = self._get_on_message(l) + self.assertFalse(callback(m.decode(), m)) + self.assertTrue(warn.call_count) + with self.assertRaises(Empty): + self.buffer.get_nowait() + self.assertTrue(self.timer.empty()) + m.reject.assert_called_with(requeue=False) + self.assertTrue(logger.critical.call_count) + + def test_receive_message_eta(self): + raise SkipTest('broken test, may fail at random') + import sys + from functools import partial + if os.environ.get('C_DEBUG_TEST'): + pp = partial(print, file=sys.__stderr__) + else: + def pp(*args, **kwargs): + pass + pp('TEST RECEIVE MESSAGE ETA') + pp('+CREATE MYKOMBUCONSUMER') + l = _MyKombuConsumer(self.buffer.put, timer=self.timer, app=self.app) + pp('-CREATE MYKOMBUCONSUMER') + l.steps.pop() + l.event_dispatcher = mock_event_dispatcher() + backend = Mock() + pp('+ CREATE MESSAGE') + m = create_message( + backend, task=self.foo_task.name, + args=[2, 4, 8], kwargs={}, + eta=(datetime.now() + timedelta(days=1)).isoformat(), + ) + pp('- CREATE MESSAGE') + + try: + pp('+ BLUEPRINT START 1') + l.blueprint.start(l) + pp('- BLUEPRINT START 1') + p = l.app.conf.BROKER_CONNECTION_RETRY + l.app.conf.BROKER_CONNECTION_RETRY = False + pp('+ BLUEPRINT START 2') + l.blueprint.start(l) + pp('- BLUEPRINT START 2') + l.app.conf.BROKER_CONNECTION_RETRY = p + pp('+ BLUEPRINT RESTART') + l.blueprint.restart(l) + pp('- BLUEPRINT RESTART') + l.event_dispatcher = mock_event_dispatcher() + pp('+ GET ON MESSAGE') + callback = self._get_on_message(l) + pp('- GET ON MESSAGE') + pp('+ CALLBACK') + callback(m.decode(), m) + pp('- CALLBACK') + finally: + pp('+ STOP TIMER') + l.timer.stop() + pp('- STOP TIMER') + try: + pp('+ JOIN TIMER') + l.timer.join() + pp('- JOIN TIMER') + except RuntimeError: + pass + + in_hold = l.timer.queue[0] + self.assertEqual(len(in_hold), 3) + eta, priority, entry = in_hold + task = entry.args[0] + self.assertIsInstance(task, Request) + self.assertEqual(task.name, self.foo_task.name) + self.assertEqual(task.execute(), 2 * 4 * 8) + with self.assertRaises(Empty): + self.buffer.get_nowait() + + def test_reset_pidbox_node(self): + l = MyKombuConsumer(self.buffer.put, timer=self.timer, app=self.app) + con = find_step(l, consumer.Control).box + con.node = Mock() + chan = con.node.channel = Mock() + l.connection = Mock() + chan.close.side_effect = socket.error('foo') + l.connection_errors = (socket.error, ) + con.reset() + chan.close.assert_called_with() + + def test_reset_pidbox_node_green(self): + from celery.worker.pidbox import gPidbox + pool = Mock() + pool.is_green = True + l = MyKombuConsumer(self.buffer.put, timer=self.timer, pool=pool, + app=self.app) + con = find_step(l, consumer.Control) + self.assertIsInstance(con.box, gPidbox) + con.start(l) + l.pool.spawn_n.assert_called_with( + con.box.loop, l, + ) + + def test__green_pidbox_node(self): + pool = Mock() + pool.is_green = True + l = MyKombuConsumer(self.buffer.put, timer=self.timer, pool=pool, + app=self.app) + l.node = Mock() + controller = find_step(l, consumer.Control) + + class BConsumer(Mock): + + def __enter__(self): + self.consume() + return self + + def __exit__(self, *exc_info): + self.cancel() + + controller.box.node.listen = BConsumer() + connections = [] + + class Connection(object): + calls = 0 + + def __init__(self, obj): + connections.append(self) + self.obj = obj + self.default_channel = self.channel() + self.closed = False + + def __enter__(self): + return self + + def __exit__(self, *exc_info): + self.close() + + def channel(self): + return Mock() + + def as_uri(self): + return 'dummy://' + + def drain_events(self, **kwargs): + if not self.calls: + self.calls += 1 + raise socket.timeout() + self.obj.connection = None + controller.box._node_shutdown.set() + + def close(self): + self.closed = True + + l.connection = Mock() + l.connect = lambda: Connection(obj=l) + controller = find_step(l, consumer.Control) + controller.box.loop(l) + + self.assertTrue(controller.box.node.listen.called) + self.assertTrue(controller.box.consumer) + controller.box.consumer.consume.assert_called_with() + + self.assertIsNone(l.connection) + self.assertTrue(connections[0].closed) + + @patch('kombu.connection.Connection._establish_connection') + @patch('kombu.utils.sleep') + def test_connect_errback(self, sleep, connect): + l = MyKombuConsumer(self.buffer.put, timer=self.timer, app=self.app) + from kombu.transport.memory import Transport + Transport.connection_errors = (ChannelError, ) + + def effect(): + if connect.call_count > 1: + return + raise ChannelError('error') + connect.side_effect = effect + l.connect() + connect.assert_called_with() + + def test_stop_pidbox_node(self): + l = MyKombuConsumer(self.buffer.put, timer=self.timer, app=self.app) + cont = find_step(l, consumer.Control) + cont._node_stopped = Event() + cont._node_shutdown = Event() + cont._node_stopped.set() + cont.stop(l) + + def test_start__loop(self): + + class _QoS(object): + prev = 3 + value = 4 + + def update(self): + self.prev = self.value + + class _Consumer(MyKombuConsumer): + iterations = 0 + + def reset_connection(self): + if self.iterations >= 1: + raise KeyError('foo') + + init_callback = Mock() + l = _Consumer(self.buffer.put, timer=self.timer, + init_callback=init_callback, app=self.app) + l.task_consumer = Mock() + l.broadcast_consumer = Mock() + l.qos = _QoS() + l.connection = Connection() + l.iterations = 0 + + def raises_KeyError(*args, **kwargs): + l.iterations += 1 + if l.qos.prev != l.qos.value: + l.qos.update() + if l.iterations >= 2: + raise KeyError('foo') + + l.loop = raises_KeyError + with self.assertRaises(KeyError): + l.start() + self.assertEqual(l.iterations, 2) + self.assertEqual(l.qos.prev, l.qos.value) + + init_callback.reset_mock() + l = _Consumer(self.buffer.put, timer=self.timer, app=self.app, + send_events=False, init_callback=init_callback) + l.qos = _QoS() + l.task_consumer = Mock() + l.broadcast_consumer = Mock() + l.connection = Connection() + l.loop = Mock(side_effect=socket.error('foo')) + with self.assertRaises(socket.error): + l.start() + self.assertTrue(l.loop.call_count) + + def test_reset_connection_with_no_node(self): + l = Consumer(self.buffer.put, timer=self.timer, app=self.app) + l.steps.pop() + self.assertEqual(None, l.pool) + l.blueprint.start(l) + + +class test_WorkController(AppCase): + + def setup(self): + self.worker = self.create_worker() + from celery import worker + self._logger = worker.logger + self._comp_logger = components.logger + self.logger = worker.logger = Mock() + self.comp_logger = components.logger = Mock() + + @self.app.task(shared=False) + def foo_task(x, y, z): + return x * y * z + self.foo_task = foo_task + + def teardown(self): + from celery import worker + worker.logger = self._logger + components.logger = self._comp_logger + + def create_worker(self, **kw): + worker = self.app.WorkController(concurrency=1, loglevel=0, **kw) + worker.blueprint.shutdown_complete.set() + return worker + + def test_on_consumer_ready(self): + self.worker.on_consumer_ready(Mock()) + + def test_setup_queues_worker_direct(self): + self.app.conf.CELERY_WORKER_DIRECT = True + self.app.amqp.__dict__['queues'] = Mock() + self.worker.setup_queues({}) + self.app.amqp.queues.select_add.assert_called_with( + worker_direct(self.worker.hostname), + ) + + def test_send_worker_shutdown(self): + with patch('celery.signals.worker_shutdown') as ws: + self.worker._send_worker_shutdown() + ws.send.assert_called_with(sender=self.worker) + + def test_process_shutdown_on_worker_shutdown(self): + raise SkipTest('unstable test') + from celery.concurrency.prefork import process_destructor + from celery.concurrency.asynpool import Worker + with patch('celery.signals.worker_process_shutdown') as ws: + Worker._make_shortcuts = Mock() + with patch('os._exit') as _exit: + worker = Worker(None, None, on_exit=process_destructor) + worker._do_exit(22, 3.1415926) + ws.send.assert_called_with( + sender=None, pid=22, exitcode=3.1415926, + ) + _exit.assert_called_with(3.1415926) + + def test_process_task_revoked_release_semaphore(self): + self.worker._quick_release = Mock() + req = Mock() + req.execute_using_pool.side_effect = TaskRevokedError + self.worker._process_task(req) + self.worker._quick_release.assert_called_with() + + delattr(self.worker, '_quick_release') + self.worker._process_task(req) + + def test_shutdown_no_blueprint(self): + self.worker.blueprint = None + self.worker._shutdown() + + @patch('celery.platforms.create_pidlock') + def test_use_pidfile(self, create_pidlock): + create_pidlock.return_value = Mock() + worker = self.create_worker(pidfile='pidfilelockfilepid') + worker.steps = [] + worker.start() + self.assertTrue(create_pidlock.called) + worker.stop() + self.assertTrue(worker.pidlock.release.called) + + @patch('celery.platforms.signals') + @patch('celery.platforms.set_mp_process_title') + def test_process_initializer(self, set_mp_process_title, _signals): + with restore_logging(): + from celery import signals + from celery._state import _tls + from celery.concurrency.prefork import ( + process_initializer, WORKER_SIGRESET, WORKER_SIGIGNORE, + ) + + def on_worker_process_init(**kwargs): + on_worker_process_init.called = True + on_worker_process_init.called = False + signals.worker_process_init.connect(on_worker_process_init) + + def Loader(*args, **kwargs): + loader = Mock(*args, **kwargs) + loader.conf = {} + loader.override_backends = {} + return loader + + with self.Celery(loader=Loader) as app: + app.conf = AttributeDict(DEFAULTS) + process_initializer(app, 'awesome.worker.com') + _signals.ignore.assert_any_call(*WORKER_SIGIGNORE) + _signals.reset.assert_any_call(*WORKER_SIGRESET) + self.assertTrue(app.loader.init_worker.call_count) + self.assertTrue(on_worker_process_init.called) + self.assertIs(_tls.current_app, app) + set_mp_process_title.assert_called_with( + 'celeryd', hostname='awesome.worker.com', + ) + + with patch('celery.app.trace.setup_worker_optimizations') as S: + os.environ['FORKED_BY_MULTIPROCESSING'] = "1" + try: + process_initializer(app, 'luke.worker.com') + S.assert_called_with(app) + finally: + os.environ.pop('FORKED_BY_MULTIPROCESSING', None) + + def test_attrs(self): + worker = self.worker + self.assertIsNotNone(worker.timer) + self.assertIsInstance(worker.timer, Timer) + self.assertIsNotNone(worker.pool) + self.assertIsNotNone(worker.consumer) + self.assertTrue(worker.steps) + + def test_with_embedded_beat(self): + worker = self.app.WorkController(concurrency=1, loglevel=0, beat=True) + self.assertTrue(worker.beat) + self.assertIn(worker.beat, [w.obj for w in worker.steps]) + + def test_with_autoscaler(self): + worker = self.create_worker( + autoscale=[10, 3], send_events=False, + timer_cls='celery.utils.timer2.Timer', + ) + self.assertTrue(worker.autoscaler) + + def test_dont_stop_or_terminate(self): + worker = self.app.WorkController(concurrency=1, loglevel=0) + worker.stop() + self.assertNotEqual(worker.blueprint.state, CLOSE) + worker.terminate() + self.assertNotEqual(worker.blueprint.state, CLOSE) + + sigsafe, worker.pool.signal_safe = worker.pool.signal_safe, False + try: + worker.blueprint.state = RUN + worker.stop(in_sighandler=True) + self.assertNotEqual(worker.blueprint.state, CLOSE) + worker.terminate(in_sighandler=True) + self.assertNotEqual(worker.blueprint.state, CLOSE) + finally: + worker.pool.signal_safe = sigsafe + + def test_on_timer_error(self): + worker = self.app.WorkController(concurrency=1, loglevel=0) + + try: + raise KeyError('foo') + except KeyError as exc: + components.Timer(worker).on_timer_error(exc) + msg, args = self.comp_logger.error.call_args[0] + self.assertIn('KeyError', msg % args) + + def test_on_timer_tick(self): + worker = self.app.WorkController(concurrency=1, loglevel=10) + + components.Timer(worker).on_timer_tick(30.0) + xargs = self.comp_logger.debug.call_args[0] + fmt, arg = xargs[0], xargs[1] + self.assertEqual(30.0, arg) + self.assertIn('Next eta %s secs', fmt) + + def test_process_task(self): + worker = self.worker + worker.pool = Mock() + backend = Mock() + m = create_message(backend, task=self.foo_task.name, args=[4, 8, 10], + kwargs={}) + task = Request(m.decode(), message=m, app=self.app) + worker._process_task(task) + self.assertEqual(worker.pool.apply_async.call_count, 1) + worker.pool.stop() + + def test_process_task_raise_base(self): + worker = self.worker + worker.pool = Mock() + worker.pool.apply_async.side_effect = KeyboardInterrupt('Ctrl+C') + backend = Mock() + m = create_message(backend, task=self.foo_task.name, args=[4, 8, 10], + kwargs={}) + task = Request(m.decode(), message=m, app=self.app) + worker.steps = [] + worker.blueprint.state = RUN + with self.assertRaises(KeyboardInterrupt): + worker._process_task(task) + + def test_process_task_raise_WorkerTerminate(self): + worker = self.worker + worker.pool = Mock() + worker.pool.apply_async.side_effect = WorkerTerminate() + backend = Mock() + m = create_message(backend, task=self.foo_task.name, args=[4, 8, 10], + kwargs={}) + task = Request(m.decode(), message=m, app=self.app) + worker.steps = [] + worker.blueprint.state = RUN + with self.assertRaises(SystemExit): + worker._process_task(task) + + def test_process_task_raise_regular(self): + worker = self.worker + worker.pool = Mock() + worker.pool.apply_async.side_effect = KeyError('some exception') + backend = Mock() + m = create_message(backend, task=self.foo_task.name, args=[4, 8, 10], + kwargs={}) + task = Request(m.decode(), message=m, app=self.app) + worker._process_task(task) + worker.pool.stop() + + def test_start_catches_base_exceptions(self): + worker1 = self.create_worker() + worker1.blueprint.state = RUN + stc = MockStep() + stc.start.side_effect = WorkerTerminate() + worker1.steps = [stc] + worker1.start() + stc.start.assert_called_with(worker1) + self.assertTrue(stc.terminate.call_count) + + worker2 = self.create_worker() + worker2.blueprint.state = RUN + sec = MockStep() + sec.start.side_effect = WorkerShutdown() + sec.terminate = None + worker2.steps = [sec] + worker2.start() + self.assertTrue(sec.stop.call_count) + + def test_state_db(self): + from celery.worker import state + Persistent = state.Persistent + + state.Persistent = Mock() + try: + worker = self.create_worker(state_db='statefilename') + self.assertTrue(worker._persistence) + finally: + state.Persistent = Persistent + + def test_process_task_sem(self): + worker = self.worker + worker._quick_acquire = Mock() + + req = Mock() + worker._process_task_sem(req) + worker._quick_acquire.assert_called_with(worker._process_task, req) + + def test_signal_consumer_close(self): + worker = self.worker + worker.consumer = Mock() + + worker.signal_consumer_close() + worker.consumer.close.assert_called_with() + + worker.consumer.close.side_effect = AttributeError() + worker.signal_consumer_close() + + def test_start__stop(self): + worker = self.worker + worker.blueprint.shutdown_complete.set() + worker.steps = [MockStep(StartStopStep(self)) for _ in range(4)] + worker.blueprint.state = RUN + worker.blueprint.started = 4 + for w in worker.steps: + w.start = Mock() + w.close = Mock() + w.stop = Mock() + + worker.start() + for w in worker.steps: + self.assertTrue(w.start.call_count) + worker.consumer = Mock() + worker.stop() + for stopstep in worker.steps: + self.assertTrue(stopstep.close.call_count) + self.assertTrue(stopstep.stop.call_count) + + # Doesn't close pool if no pool. + worker.start() + worker.pool = None + worker.stop() + + # test that stop of None is not attempted + worker.steps[-1] = None + worker.start() + worker.stop() + + def test_step_raises(self): + worker = self.worker + step = Mock() + worker.steps = [step] + step.start.side_effect = TypeError() + worker.stop = Mock() + worker.start() + worker.stop.assert_called_with() + + def test_state(self): + self.assertTrue(self.worker.state) + + def test_start__terminate(self): + worker = self.worker + worker.blueprint.shutdown_complete.set() + worker.blueprint.started = 5 + worker.blueprint.state = RUN + worker.steps = [MockStep() for _ in range(5)] + worker.start() + for w in worker.steps[:3]: + self.assertTrue(w.start.call_count) + self.assertTrue(worker.blueprint.started, len(worker.steps)) + self.assertEqual(worker.blueprint.state, RUN) + worker.terminate() + for step in worker.steps: + self.assertTrue(step.terminate.call_count) + + def test_Queues_pool_no_sem(self): + w = Mock() + w.pool_cls.uses_semaphore = False + components.Queues(w).create(w) + self.assertIs(w.process_task, w._process_task) + + def test_Hub_crate(self): + w = Mock() + x = components.Hub(w) + x.create(w) + self.assertTrue(w.timer.max_interval) + + def test_Pool_crate_threaded(self): + w = Mock() + w._conninfo.connection_errors = w._conninfo.channel_errors = () + w.pool_cls = Mock() + w.use_eventloop = False + pool = components.Pool(w) + pool.create(w) + + def test_Pool_create(self): + from kombu.async.semaphore import LaxBoundedSemaphore + w = Mock() + w._conninfo.connection_errors = w._conninfo.channel_errors = () + w.hub = Mock() + + PoolImp = Mock() + poolimp = PoolImp.return_value = Mock() + poolimp._pool = [Mock(), Mock()] + poolimp._cache = {} + poolimp._fileno_to_inq = {} + poolimp._fileno_to_outq = {} + + from celery.concurrency.prefork import TaskPool as _TaskPool + + class MockTaskPool(_TaskPool): + Pool = PoolImp + + @property + def timers(self): + return {Mock(): 30} + + w.pool_cls = MockTaskPool + w.use_eventloop = True + w.consumer.restart_count = -1 + pool = components.Pool(w) + pool.create(w) + pool.register_with_event_loop(w, w.hub) + self.assertIsInstance(w.semaphore, LaxBoundedSemaphore) + P = w.pool + P.start() diff --git a/celery/utils/__init__.py b/celery/utils/__init__.py new file mode 100644 index 0000000..2420509 --- /dev/null +++ b/celery/utils/__init__.py @@ -0,0 +1,397 @@ +# -*- coding: utf-8 -*- +""" + celery.utils + ~~~~~~~~~~~~ + + Utility functions. + +""" +from __future__ import absolute_import, print_function + +import numbers +import os +import re +import socket +import sys +import traceback +import warnings +import datetime + +from collections import Callable +from functools import partial, wraps +from inspect import getargspec +from pprint import pprint + +from kombu.entity import Exchange, Queue + +from celery.exceptions import CPendingDeprecationWarning, CDeprecationWarning +from celery.five import WhateverIO, items, reraise, string_t + +__all__ = ['worker_direct', 'warn_deprecated', 'deprecated', 'lpmerge', + 'is_iterable', 'isatty', 'cry', 'maybe_reraise', 'strtobool', + 'jsonify', 'gen_task_name', 'nodename', 'nodesplit', + 'cached_property'] + +PY3 = sys.version_info[0] == 3 + + +PENDING_DEPRECATION_FMT = """ + {description} is scheduled for deprecation in \ + version {deprecation} and removal in version v{removal}. \ + {alternative} +""" + +DEPRECATION_FMT = """ + {description} is deprecated and scheduled for removal in + version {removal}. {alternative} +""" + +#: Billiard sets this when execv is enabled. +#: We use it to find out the name of the original ``__main__`` +#: module, so that we can properly rewrite the name of the +#: task to be that of ``App.main``. +MP_MAIN_FILE = os.environ.get('MP_MAIN_FILE') or None + +#: Exchange for worker direct queues. +WORKER_DIRECT_EXCHANGE = Exchange('C.dq') + +#: Format for worker direct queue names. +WORKER_DIRECT_QUEUE_FORMAT = '{hostname}.dq' + +#: Separator for worker node name and hostname. +NODENAME_SEP = '@' + +NODENAME_DEFAULT = 'celery' +RE_FORMAT = re.compile(r'%(\w)') + + +def worker_direct(hostname): + """Return :class:`kombu.Queue` that is a direct route to + a worker by hostname. + + :param hostname: The fully qualified node name of a worker + (e.g. ``w1@example.com``). If passed a + :class:`kombu.Queue` instance it will simply return + that instead. + """ + if isinstance(hostname, Queue): + return hostname + return Queue(WORKER_DIRECT_QUEUE_FORMAT.format(hostname=hostname), + WORKER_DIRECT_EXCHANGE, + hostname, auto_delete=True) + + +def warn_deprecated(description=None, deprecation=None, + removal=None, alternative=None, stacklevel=2): + ctx = {'description': description, + 'deprecation': deprecation, 'removal': removal, + 'alternative': alternative} + if deprecation is not None: + w = CPendingDeprecationWarning(PENDING_DEPRECATION_FMT.format(**ctx)) + else: + w = CDeprecationWarning(DEPRECATION_FMT.format(**ctx)) + warnings.warn(w, stacklevel=stacklevel) + + +def deprecated(deprecation=None, removal=None, + alternative=None, description=None): + """Decorator for deprecated functions. + + A deprecation warning will be emitted when the function is called. + + :keyword deprecation: Version that marks first deprecation, if this + argument is not set a ``PendingDeprecationWarning`` will be emitted + instead. + :keyword removal: Future version when this feature will be removed. + :keyword alternative: Instructions for an alternative solution (if any). + :keyword description: Description of what is being deprecated. + + """ + def _inner(fun): + + @wraps(fun) + def __inner(*args, **kwargs): + from .imports import qualname + warn_deprecated(description=description or qualname(fun), + deprecation=deprecation, + removal=removal, + alternative=alternative, + stacklevel=3) + return fun(*args, **kwargs) + return __inner + return _inner + + +def deprecated_property(deprecation=None, removal=None, + alternative=None, description=None): + def _inner(fun): + return _deprecated_property( + fun, deprecation=deprecation, removal=removal, + alternative=alternative, description=description or fun.__name__) + return _inner + + +class _deprecated_property(object): + + def __init__(self, fget=None, fset=None, fdel=None, doc=None, **depreinfo): + self.__get = fget + self.__set = fset + self.__del = fdel + self.__name__, self.__module__, self.__doc__ = ( + fget.__name__, fget.__module__, fget.__doc__, + ) + self.depreinfo = depreinfo + self.depreinfo.setdefault('stacklevel', 3) + + def __get__(self, obj, type=None): + if obj is None: + return self + warn_deprecated(**self.depreinfo) + return self.__get(obj) + + def __set__(self, obj, value): + if obj is None: + return self + if self.__set is None: + raise AttributeError('cannot set attribute') + warn_deprecated(**self.depreinfo) + self.__set(obj, value) + + def __delete__(self, obj): + if obj is None: + return self + if self.__del is None: + raise AttributeError('cannot delete attribute') + warn_deprecated(**self.depreinfo) + self.__del(obj) + + def setter(self, fset): + return self.__class__(self.__get, fset, self.__del, **self.depreinfo) + + def deleter(self, fdel): + return self.__class__(self.__get, self.__set, fdel, **self.depreinfo) + + +def lpmerge(L, R): + """In place left precedent dictionary merge. + + Keeps values from `L`, if the value in `R` is :const:`None`.""" + set = L.__setitem__ + [set(k, v) for k, v in items(R) if v is not None] + return L + + +def is_iterable(obj): + try: + iter(obj) + except TypeError: + return False + return True + + +def fun_takes_kwargs(fun, kwlist=[]): + # deprecated + S = getattr(fun, 'argspec', getargspec(fun)) + if S.keywords is not None: + return kwlist + return [kw for kw in kwlist if kw in S.args] + + +def isatty(fh): + try: + return fh.isatty() + except AttributeError: + pass + + +def cry(out=None, sepchr='=', seplen=49): # pragma: no cover + """Return stacktrace of all active threads, + taken from https://gist.github.com/737056.""" + import threading + + out = WhateverIO() if out is None else out + P = partial(print, file=out) + + # get a map of threads by their ID so we can print their names + # during the traceback dump + tmap = dict((t.ident, t) for t in threading.enumerate()) + + sep = sepchr * seplen + for tid, frame in items(sys._current_frames()): + thread = tmap.get(tid) + if not thread: + # skip old junk (left-overs from a fork) + continue + P('{0.name}'.format(thread)) + P(sep) + traceback.print_stack(frame, file=out) + P(sep) + P('LOCAL VARIABLES') + P(sep) + pprint(frame.f_locals, stream=out) + P('\n') + return out.getvalue() + + +def maybe_reraise(): + """Re-raise if an exception is currently being handled, or return + otherwise.""" + exc_info = sys.exc_info() + try: + if exc_info[2]: + reraise(exc_info[0], exc_info[1], exc_info[2]) + finally: + # see http://docs.python.org/library/sys.html#sys.exc_info + del(exc_info) + + +def strtobool(term, table={'false': False, 'no': False, '0': False, + 'true': True, 'yes': True, '1': True, + 'on': True, 'off': False}): + """Convert common terms for true/false to bool + (true/false/yes/no/on/off/1/0).""" + if isinstance(term, string_t): + try: + return table[term.lower()] + except KeyError: + raise TypeError('Cannot coerce {0!r} to type bool'.format(term)) + return term + + +def jsonify(obj, + builtin_types=(numbers.Real, string_t), key=None, + keyfilter=None, + unknown_type_filter=None): + """Transforms object making it suitable for json serialization""" + from kombu.abstract import Object as KombuDictType + _jsonify = partial(jsonify, builtin_types=builtin_types, key=key, + keyfilter=keyfilter, + unknown_type_filter=unknown_type_filter) + + if isinstance(obj, KombuDictType): + obj = obj.as_dict(recurse=True) + + if obj is None or isinstance(obj, builtin_types): + return obj + elif isinstance(obj, (tuple, list)): + return [_jsonify(v) for v in obj] + elif isinstance(obj, dict): + return dict((k, _jsonify(v, key=k)) + for k, v in items(obj) + if (keyfilter(k) if keyfilter else 1)) + elif isinstance(obj, datetime.datetime): + # See "Date Time String Format" in the ECMA-262 specification. + r = obj.isoformat() + if obj.microsecond: + r = r[:23] + r[26:] + if r.endswith('+00:00'): + r = r[:-6] + 'Z' + return r + elif isinstance(obj, datetime.date): + return obj.isoformat() + elif isinstance(obj, datetime.time): + r = obj.isoformat() + if obj.microsecond: + r = r[:12] + return r + elif isinstance(obj, datetime.timedelta): + return str(obj) + else: + if unknown_type_filter is None: + raise ValueError( + 'Unsupported type: {0!r} {1!r} (parent: {2})'.format( + type(obj), obj, key)) + return unknown_type_filter(obj) + + +def gen_task_name(app, name, module_name): + """Generate task name from name/module pair.""" + try: + module = sys.modules[module_name] + except KeyError: + # Fix for manage.py shell_plus (Issue #366) + module = None + + if module is not None: + module_name = module.__name__ + # - If the task module is used as the __main__ script + # - we need to rewrite the module part of the task name + # - to match App.main. + if MP_MAIN_FILE and module.__file__ == MP_MAIN_FILE: + # - see comment about :envvar:`MP_MAIN_FILE` above. + module_name = '__main__' + if module_name == '__main__' and app.main: + return '.'.join([app.main, name]) + return '.'.join(p for p in (module_name, name) if p) + + +def nodename(name, hostname): + """Create node name from name/hostname pair.""" + return NODENAME_SEP.join((name, hostname)) + + +def anon_nodename(hostname=None, prefix='gen'): + return nodename(''.join([prefix, str(os.getpid())]), + hostname or socket.gethostname()) + + +def nodesplit(nodename): + """Split node name into tuple of name/hostname.""" + parts = nodename.split(NODENAME_SEP, 1) + if len(parts) == 1: + return None, parts[0] + return parts + + +def default_nodename(hostname): + name, host = nodesplit(hostname or '') + return nodename(name or NODENAME_DEFAULT, host or socket.gethostname()) + + +def node_format(s, nodename, **extra): + name, host = nodesplit(nodename) + return host_format( + s, host, n=name or NODENAME_DEFAULT, **extra) + + +def _fmt_process_index(prefix='', default='0'): + from .log import current_process_index + index = current_process_index() + return '{0}{1}'.format(prefix, index) if index else default +_fmt_process_index_with_prefix = partial(_fmt_process_index, '-', '') + + +def host_format(s, host=None, **extra): + host = host or socket.gethostname() + name, _, domain = host.partition('.') + keys = dict({ + 'h': host, 'n': name, 'd': domain, + 'i': _fmt_process_index, 'I': _fmt_process_index_with_prefix, + }, **extra) + return simple_format(s, keys) + + +def simple_format(s, keys, pattern=RE_FORMAT, expand=r'\1'): + if s: + keys.setdefault('%', '%') + + def resolve(match): + resolver = keys[match.expand(expand)] + if isinstance(resolver, Callable): + return resolver() + return resolver + + return pattern.sub(resolve, s) + return s + + +# ------------------------------------------------------------------------ # +# > XXX Compat +from .log import LOG_LEVELS # noqa +from .imports import ( # noqa + qualname as get_full_cls_name, symbol_by_name as get_cls_by_name, + instantiate, import_from_cwd +) +from .functional import chunks, noop # noqa +from kombu.utils import cached_property, kwdict, uuid # noqa +gen_unique_id = uuid diff --git a/celery/utils/compat.py b/celery/utils/compat.py new file mode 100644 index 0000000..6f62964 --- /dev/null +++ b/celery/utils/compat.py @@ -0,0 +1 @@ +from celery.five import * # noqa diff --git a/celery/utils/debug.py b/celery/utils/debug.py new file mode 100644 index 0000000..79ac4e1 --- /dev/null +++ b/celery/utils/debug.py @@ -0,0 +1,160 @@ +# -*- coding: utf-8 -*- +""" + celery.utils.debug + ~~~~~~~~~~~~~~~~~~ + + Utilities for debugging memory usage. + +""" +from __future__ import absolute_import, print_function, unicode_literals + +import os + +from contextlib import contextmanager +from functools import partial + +from celery.five import range +from celery.platforms import signals + +try: + from psutil import Process +except ImportError: + Process = None # noqa + +__all__ = [ + 'blockdetection', 'sample_mem', 'memdump', 'sample', + 'humanbytes', 'mem_rss', 'ps', +] + +UNITS = ( + (2 ** 40.0, 'TB'), + (2 ** 30.0, 'GB'), + (2 ** 20.0, 'MB'), + (2 ** 10.0, 'kB'), + (0.0, '{0!d}b'), +) + +_process = None +_mem_sample = [] + + +def _on_blocking(signum, frame): + import inspect + raise RuntimeError( + 'Blocking detection timed-out at: {0}'.format( + inspect.getframeinfo(frame) + ) + ) + + +@contextmanager +def blockdetection(timeout): + """A timeout context using ``SIGALRM`` that can be used to detect blocking + functions.""" + if not timeout: + yield + else: + old_handler = signals['ALRM'] + old_handler = None if old_handler == _on_blocking else old_handler + + signals['ALRM'] = _on_blocking + + try: + yield signals.arm_alarm(timeout) + finally: + if old_handler: + signals['ALRM'] = old_handler + signals.reset_alarm() + + +def sample_mem(): + """Sample RSS memory usage. + + Statistics can then be output by calling :func:`memdump`. + + """ + current_rss = mem_rss() + _mem_sample.append(current_rss) + return current_rss + + +def _memdump(samples=10): + S = _mem_sample + prev = list(S) if len(S) <= samples else sample(S, samples) + _mem_sample[:] = [] + import gc + gc.collect() + after_collect = mem_rss() + return prev, after_collect + + +def memdump(samples=10, file=None): + """Dump memory statistics. + + Will print a sample of all RSS memory samples added by + calling :func:`sample_mem`, and in addition print + used RSS memory after :func:`gc.collect`. + + """ + say = partial(print, file=file) + if ps() is None: + say('- rss: (psutil not installed).') + return + prev, after_collect = _memdump(samples) + if prev: + say('- rss (sample):') + for mem in prev: + say('- > {0},'.format(mem)) + say('- rss (end): {0}.'.format(after_collect)) + + +def sample(x, n, k=0): + """Given a list `x` a sample of length ``n`` of that list is returned. + + E.g. if `n` is 10, and `x` has 100 items, a list of every 10th + item is returned. + + ``k`` can be used as offset. + + """ + j = len(x) // n + for _ in range(n): + try: + yield x[k] + except IndexError: + break + k += j + + +def hfloat(f, p=5): + """Convert float to value suitable for humans. + + :keyword p: Float precision. + + """ + i = int(f) + return i if i == f else '{0:.{p}}'.format(f, p=p) + + +def humanbytes(s): + """Convert bytes to human-readable form (e.g. kB, MB).""" + return next( + '{0}{1}'.format(hfloat(s / div if div else s), unit) + for div, unit in UNITS if s >= div + ) + + +def mem_rss(): + """Return RSS memory usage as a humanized string.""" + p = ps() + if p is not None: + return humanbytes(p.get_memory_info().rss) + + +def ps(): + """Return the global :class:`psutil.Process` instance, + or :const:`None` if :mod:`psutil` is not installed.""" + global _process + if _process is None and Process is not None: + _process = Process(os.getpid()) + return _process diff --git a/celery/utils/dispatch/__init__.py b/celery/utils/dispatch/__init__.py new file mode 100644 index 0000000..b6e8d0b --- /dev/null +++ b/celery/utils/dispatch/__init__.py @@ -0,0 +1,6 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import + +from .signal import Signal + +__all__ = ['Signal'] diff --git a/celery/utils/dispatch/saferef.py b/celery/utils/dispatch/saferef.py new file mode 100644 index 0000000..cd818bb --- /dev/null +++ b/celery/utils/dispatch/saferef.py @@ -0,0 +1,286 @@ +# -*- coding: utf-8 -*- +""" +"Safe weakrefs", originally from pyDispatcher. + +Provides a way to safely weakref any function, including bound methods (which +aren't handled by the core weakref module). +""" +from __future__ import absolute_import + +import sys +import traceback +import weakref + +__all__ = ['safe_ref'] + +PY3 = sys.version_info[0] == 3 + + +def safe_ref(target, on_delete=None): # pragma: no cover + """Return a *safe* weak reference to a callable target + + :param target: the object to be weakly referenced, if it's a + bound method reference, will create a :class:`BoundMethodWeakref`, + otherwise creates a simple :class:`weakref.ref`. + + :keyword on_delete: if provided, will have a hard reference stored + to the callable to be called after the safe reference + goes out of scope with the reference object, (either a + :class:`weakref.ref` or a :class:`BoundMethodWeakref`) as argument. + """ + if getattr(target, '__self__', None) is not None: + # Turn a bound method into a BoundMethodWeakref instance. + # Keep track of these instances for lookup by disconnect(). + assert hasattr(target, '__func__'), \ + """safe_ref target {0!r} has __self__, but no __func__: \ + don't know how to create reference""".format(target) + return get_bound_method_weakref(target=target, + on_delete=on_delete) + if callable(on_delete): + return weakref.ref(target, on_delete) + else: + return weakref.ref(target) + + +class BoundMethodWeakref(object): # pragma: no cover + """'Safe' and reusable weak references to instance methods. + + BoundMethodWeakref objects provide a mechanism for + referencing a bound method without requiring that the + method object itself (which is normally a transient + object) is kept alive. Instead, the BoundMethodWeakref + object keeps weak references to both the object and the + function which together define the instance method. + + .. attribute:: key + + the identity key for the reference, calculated + by the class's :meth:`calculate_key` method applied to the + target instance method + + .. attribute:: deletion_methods + + sequence of callable objects taking + single argument, a reference to this object which + will be called when *either* the target object or + target function is garbage collected (i.e. when + this object becomes invalid). These are specified + as the on_delete parameters of :func:`safe_ref` calls. + + .. attribute:: weak_self + + weak reference to the target object + + .. attribute:: weak_fun + + weak reference to the target function + + .. attribute:: _all_instances + + class attribute pointing to all live + BoundMethodWeakref objects indexed by the class's + `calculate_key(target)` method applied to the target + objects. This weak value dictionary is used to + short-circuit creation so that multiple references + to the same (object, function) pair produce the + same BoundMethodWeakref instance. + + """ + + _all_instances = weakref.WeakValueDictionary() + + def __new__(cls, target, on_delete=None, *arguments, **named): + """Create new instance or return current instance + + Basically this method of construction allows us to + short-circuit creation of references to already- + referenced instance methods. The key corresponding + to the target is calculated, and if there is already + an existing reference, that is returned, with its + deletionMethods attribute updated. Otherwise the + new instance is created and registered in the table + of already-referenced methods. + + """ + key = cls.calculate_key(target) + current = cls._all_instances.get(key) + if current is not None: + current.deletion_methods.append(on_delete) + return current + else: + base = super(BoundMethodWeakref, cls).__new__(cls) + cls._all_instances[key] = base + base.__init__(target, on_delete, *arguments, **named) + return base + + def __init__(self, target, on_delete=None): + """Return a weak-reference-like instance for a bound method + + :param target: the instance-method target for the weak + reference, must have `__self__` and `__func__` attributes + and be reconstructable via:: + + target.__func__.__get__(target.__self__) + + which is true of built-in instance methods. + + :keyword on_delete: optional callback which will be called + when this weak reference ceases to be valid + (i.e. either the object or the function is garbage + collected). Should take a single argument, + which will be passed a pointer to this object. + + """ + def remove(weak, self=self): + """Set self.is_dead to true when method or instance is destroyed""" + methods = self.deletion_methods[:] + del(self.deletion_methods[:]) + try: + del(self.__class__._all_instances[self.key]) + except KeyError: + pass + for function in methods: + try: + if callable(function): + function(self) + except Exception as exc: + try: + traceback.print_exc() + except AttributeError: + print('Exception during saferef {0} cleanup function ' + '{1}: {2}'.format(self, function, exc)) + + self.deletion_methods = [on_delete] + self.key = self.calculate_key(target) + self.weak_self = weakref.ref(target.__self__, remove) + self.weak_fun = weakref.ref(target.__func__, remove) + self.self_name = str(target.__self__) + self.fun_name = str(target.__func__.__name__) + + def calculate_key(cls, target): + """Calculate the reference key for this reference + + Currently this is a two-tuple of the `id()`'s of the + target object and the target function respectively. + """ + return id(target.__self__), id(target.__func__) + calculate_key = classmethod(calculate_key) + + def __str__(self): + """Give a friendly representation of the object""" + return '{0}( {1}.{2} )'.format( + type(self).__name__, + self.self_name, + self.fun_name, + ) + + __repr__ = __str__ + + def __bool__(self): + """Whether we are still a valid reference""" + return self() is not None + __nonzero__ = __bool__ # py2 + + if not PY3: + def __cmp__(self, other): + """Compare with another reference""" + if not isinstance(other, self.__class__): + return cmp(self.__class__, type(other)) # noqa + return cmp(self.key, other.key) # noqa + + def __call__(self): + """Return a strong reference to the bound method + + If the target cannot be retrieved, then will + return None, otherwise return a bound instance + method for our object and function. + + Note: + You may call this method any number of times, + as it does not invalidate the reference. + """ + target = self.weak_self() + if target is not None: + function = self.weak_fun() + if function is not None: + return function.__get__(target) + + +class BoundNonDescriptorMethodWeakref(BoundMethodWeakref): # pragma: no cover + """A specialized :class:`BoundMethodWeakref`, for platforms where + instance methods are not descriptors. + + It assumes that the function name and the target attribute name are the + same, instead of assuming that the function is a descriptor. This approach + is equally fast, but not 100% reliable because functions can be stored on + an attribute named differenty than the function's name such as in:: + + >>> class A(object): + ... pass + + >>> def foo(self): + ... return 'foo' + >>> A.bar = foo + + But this shouldn't be a common use case. So, on platforms where methods + aren't descriptors (such as Jython) this implementation has the advantage + of working in the most cases. + + """ + def __init__(self, target, on_delete=None): + """Return a weak-reference-like instance for a bound method + + :param target: the instance-method target for the weak + reference, must have `__self__` and `__func__` attributes + and be reconstructable via:: + + target.__func__.__get__(target.__self__) + + which is true of built-in instance methods. + + :keyword on_delete: optional callback which will be called + when this weak reference ceases to be valid + (i.e. either the object or the function is garbage + collected). Should take a single argument, + which will be passed a pointer to this object. + + """ + assert getattr(target.__self__, target.__name__) == target + super(BoundNonDescriptorMethodWeakref, self).__init__(target, + on_delete) + + def __call__(self): + """Return a strong reference to the bound method + + If the target cannot be retrieved, then will + return None, otherwise return a bound instance + method for our object and function. + + Note: + You may call this method any number of times, + as it does not invalidate the reference. + + """ + target = self.weak_self() + if target is not None: + function = self.weak_fun() + if function is not None: + # Using curry() would be another option, but it erases the + # "signature" of the function. That is, after a function is + # curried, the inspect module can't be used to determine how + # many arguments the function expects, nor what keyword + # arguments it supports, and pydispatcher needs this + # information. + return getattr(target, function.__name__) + + +def get_bound_method_weakref(target, on_delete): # pragma: no cover + """Instantiates the appropiate :class:`BoundMethodWeakRef`, depending + on the details of the underlying class method implementation.""" + if hasattr(target, '__get__'): + # target method is a descriptor, so the default implementation works: + return BoundMethodWeakref(target=target, on_delete=on_delete) + else: + # no luck, use the alternative implementation: + return BoundNonDescriptorMethodWeakref(target=target, + on_delete=on_delete) diff --git a/celery/utils/dispatch/signal.py b/celery/utils/dispatch/signal.py new file mode 100644 index 0000000..7d4b337 --- /dev/null +++ b/celery/utils/dispatch/signal.py @@ -0,0 +1,241 @@ +# -*- coding: utf-8 -*- +"""Signal class.""" +from __future__ import absolute_import + +import weakref +from . import saferef + +from celery.five import range +from celery.local import PromiseProxy, Proxy + +__all__ = ['Signal'] + +WEAKREF_TYPES = (weakref.ReferenceType, saferef.BoundMethodWeakref) + + +def _make_id(target): # pragma: no cover + if isinstance(target, Proxy): + target = target._get_current_object() + if hasattr(target, '__func__'): + return (id(target.__self__), id(target.__func__)) + return id(target) + + +class Signal(object): # pragma: no cover + """Base class for all signals + + + .. attribute:: receivers + Internal attribute, holds a dictionary of + `{receiverkey (id): weakref(receiver)}` mappings. + + """ + + def __init__(self, providing_args=None): + """Create a new signal. + + :param providing_args: A list of the arguments this signal can pass + along in a :meth:`send` call. + + """ + self.receivers = [] + if providing_args is None: + providing_args = [] + self.providing_args = set(providing_args) + + def _connect_proxy(self, fun, sender, weak, dispatch_uid): + return self.connect( + fun, sender=sender._get_current_object(), + weak=weak, dispatch_uid=dispatch_uid, + ) + + def connect(self, *args, **kwargs): + """Connect receiver to sender for signal. + + :param receiver: A function or an instance method which is to + receive signals. Receivers must be hashable objects. + + if weak is :const:`True`, then receiver must be weak-referencable + (more precisely :func:`saferef.safe_ref()` must be able to create a + reference to the receiver). + + Receivers must be able to accept keyword arguments. + + If receivers have a `dispatch_uid` attribute, the receiver will + not be added if another receiver already exists with that + `dispatch_uid`. + + :keyword sender: The sender to which the receiver should respond. + Must either be of type :class:`Signal`, or :const:`None` to receive + events from any sender. + + :keyword weak: Whether to use weak references to the receiver. + By default, the module will attempt to use weak references to the + receiver objects. If this parameter is false, then strong + references will be used. + + :keyword dispatch_uid: An identifier used to uniquely identify a + particular instance of a receiver. This will usually be a + string, though it may be anything hashable. + + """ + def _handle_options(sender=None, weak=True, dispatch_uid=None): + + def _connect_signal(fun): + receiver = fun + + if isinstance(sender, PromiseProxy): + sender.__then__( + self._connect_proxy, fun, sender, weak, dispatch_uid, + ) + return fun + + if dispatch_uid: + lookup_key = (dispatch_uid, _make_id(sender)) + else: + lookup_key = (_make_id(receiver), _make_id(sender)) + + if weak: + receiver = saferef.safe_ref( + receiver, on_delete=self._remove_receiver, + ) + + for r_key, _ in self.receivers: + if r_key == lookup_key: + break + else: + self.receivers.append((lookup_key, receiver)) + + return fun + + return _connect_signal + + if args and callable(args[0]): + return _handle_options(*args[1:], **kwargs)(args[0]) + return _handle_options(*args, **kwargs) + + def disconnect(self, receiver=None, sender=None, weak=True, + dispatch_uid=None): + """Disconnect receiver from sender for signal. + + If weak references are used, disconnect need not be called. The + receiver will be removed from dispatch automatically. + + :keyword receiver: The registered receiver to disconnect. May be + none if `dispatch_uid` is specified. + + :keyword sender: The registered sender to disconnect. + + :keyword weak: The weakref state to disconnect. + + :keyword dispatch_uid: the unique identifier of the receiver + to disconnect + + """ + if dispatch_uid: + lookup_key = (dispatch_uid, _make_id(sender)) + else: + lookup_key = (_make_id(receiver), _make_id(sender)) + + for index in range(len(self.receivers)): + (r_key, _) = self.receivers[index] + if r_key == lookup_key: + del self.receivers[index] + break + + def send(self, sender, **named): + """Send signal from sender to all connected receivers. + + If any receiver raises an error, the error propagates back through + send, terminating the dispatch loop, so it is quite possible to not + have all receivers called if a raises an error. + + :param sender: The sender of the signal. Either a specific + object or :const:`None`. + + :keyword \*\*named: Named arguments which will be passed to receivers. + + :returns: a list of tuple pairs: `[(receiver, response), … ]`. + + """ + responses = [] + if not self.receivers: + return responses + + for receiver in self._live_receivers(_make_id(sender)): + response = receiver(signal=self, sender=sender, **named) + responses.append((receiver, response)) + return responses + + def send_robust(self, sender, **named): + """Send signal from sender to all connected receivers catching errors. + + :param sender: The sender of the signal. Can be any python object + (normally one registered with a connect if you actually want + something to occur). + + :keyword \*\*named: Named arguments which will be passed to receivers. + These arguments must be a subset of the argument names defined in + :attr:`providing_args`. + + :returns: a list of tuple pairs: `[(receiver, response), … ]`. + + :raises DispatcherKeyError: + + if any receiver raises an error (specifically any subclass of + :exc:`Exception`), the error instance is returned as the result + for that receiver. + + """ + responses = [] + if not self.receivers: + return responses + + # Call each receiver with whatever arguments it can accept. + # Return a list of tuple pairs [(receiver, response), … ]. + for receiver in self._live_receivers(_make_id(sender)): + try: + response = receiver(signal=self, sender=sender, **named) + except Exception as err: + responses.append((receiver, err)) + else: + responses.append((receiver, response)) + return responses + + def _live_receivers(self, senderkey): + """Filter sequence of receivers to get resolved, live receivers. + + This checks for weak references and resolves them, then returning only + live receivers. + + """ + none_senderkey = _make_id(None) + receivers = [] + + for (receiverkey, r_senderkey), receiver in self.receivers: + if r_senderkey == none_senderkey or r_senderkey == senderkey: + if isinstance(receiver, WEAKREF_TYPES): + # Dereference the weak reference. + receiver = receiver() + if receiver is not None: + receivers.append(receiver) + else: + receivers.append(receiver) + return receivers + + def _remove_receiver(self, receiver): + """Remove dead receivers from connections.""" + + to_remove = [] + for key, connected_receiver in self.receivers: + if connected_receiver == receiver: + to_remove.append(key) + for key in to_remove: + for idx, (r_key, _) in enumerate(self.receivers): + if r_key == key: + del self.receivers[idx] + + def __repr__(self): + return ''.format(type(self).__name__) + + __str__ = __repr__ diff --git a/celery/utils/encoding.py b/celery/utils/encoding.py new file mode 100644 index 0000000..3ddcd35 --- /dev/null +++ b/celery/utils/encoding.py @@ -0,0 +1,14 @@ +# -*- coding: utf-8 -*- +""" + celery.utils.encoding + ~~~~~~~~~~~~~~~~~~~~~ + + This module has moved to :mod:`kombu.utils.encoding`. + +""" +from __future__ import absolute_import + +from kombu.utils.encoding import ( # noqa + default_encode, default_encoding, bytes_t, bytes_to_str, str_t, + str_to_bytes, ensure_bytes, from_utf8, safe_str, safe_repr, +) diff --git a/celery/utils/functional.py b/celery/utils/functional.py new file mode 100644 index 0000000..faa272b --- /dev/null +++ b/celery/utils/functional.py @@ -0,0 +1,306 @@ +# -*- coding: utf-8 -*- +""" + celery.utils.functional + ~~~~~~~~~~~~~~~~~~~~~~~ + + Utilities for functions. + +""" +from __future__ import absolute_import + +import sys +import threading + +from functools import wraps +from itertools import islice + +from kombu.utils import cached_property +from kombu.utils.functional import lazy, maybe_evaluate, is_list, maybe_list +from kombu.utils.compat import OrderedDict + +from celery.five import UserDict, UserList, items, keys + +__all__ = ['LRUCache', 'is_list', 'maybe_list', 'memoize', 'mlazy', 'noop', + 'first', 'firstmethod', 'chunks', 'padlist', 'mattrgetter', 'uniq', + 'regen', 'dictfilter', 'lazy', 'maybe_evaluate'] + +KEYWORD_MARK = object() + + +class LRUCache(UserDict): + """LRU Cache implementation using a doubly linked list to track access. + + :keyword limit: The maximum number of keys to keep in the cache. + When a new key is inserted and the limit has been exceeded, + the *Least Recently Used* key will be discarded from the + cache. + + """ + + def __init__(self, limit=None): + self.limit = limit + self.mutex = threading.RLock() + self.data = OrderedDict() + + def __getitem__(self, key): + with self.mutex: + value = self[key] = self.data.pop(key) + return value + + def update(self, *args, **kwargs): + with self.mutex: + data, limit = self.data, self.limit + data.update(*args, **kwargs) + if limit and len(data) > limit: + # pop additional items in case limit exceeded + # negative overflow will lead to an empty list + for item in islice(iter(data), len(data) - limit): + data.pop(item) + + def __setitem__(self, key, value): + # remove least recently used key. + with self.mutex: + if self.limit and len(self.data) >= self.limit: + self.data.pop(next(iter(self.data))) + self.data[key] = value + + def __iter__(self): + return iter(self.data) + + def _iterate_items(self): + for k in self: + try: + yield (k, self.data[k]) + except KeyError: # pragma: no cover + pass + iteritems = _iterate_items + + def _iterate_values(self): + for k in self: + try: + yield self.data[k] + except KeyError: # pragma: no cover + pass + itervalues = _iterate_values + + def _iterate_keys(self): + # userdict.keys in py3k calls __getitem__ + return keys(self.data) + iterkeys = _iterate_keys + + def incr(self, key, delta=1): + with self.mutex: + # this acts as memcached does- store as a string, but return a + # integer as long as it exists and we can cast it + newval = int(self.data.pop(key)) + delta + self[key] = str(newval) + return newval + + def __getstate__(self): + d = dict(vars(self)) + d.pop('mutex') + return d + + def __setstate__(self, state): + self.__dict__ = state + self.mutex = threading.RLock() + + if sys.version_info[0] == 3: # pragma: no cover + keys = _iterate_keys + values = _iterate_values + items = _iterate_items + else: # noqa + + def keys(self): + return list(self._iterate_keys()) + + def values(self): + return list(self._iterate_values()) + + def items(self): + return list(self._iterate_items()) + + +def memoize(maxsize=None, keyfun=None, Cache=LRUCache): + + def _memoize(fun): + mutex = threading.Lock() + cache = Cache(limit=maxsize) + + @wraps(fun) + def _M(*args, **kwargs): + if keyfun: + key = keyfun(args, kwargs) + else: + key = args + (KEYWORD_MARK, ) + tuple(sorted(kwargs.items())) + try: + with mutex: + value = cache[key] + except KeyError: + value = fun(*args, **kwargs) + _M.misses += 1 + with mutex: + cache[key] = value + else: + _M.hits += 1 + return value + + def clear(): + """Clear the cache and reset cache statistics.""" + cache.clear() + _M.hits = _M.misses = 0 + + _M.hits = _M.misses = 0 + _M.clear = clear + _M.original_func = fun + return _M + + return _memoize + + +class mlazy(lazy): + """Memoized lazy evaluation. + + The function is only evaluated once, every subsequent access + will return the same value. + + .. attribute:: evaluated + + Set to to :const:`True` after the object has been evaluated. + + """ + evaluated = False + _value = None + + def evaluate(self): + if not self.evaluated: + self._value = super(mlazy, self).evaluate() + self.evaluated = True + return self._value + + +def noop(*args, **kwargs): + """No operation. + + Takes any arguments/keyword arguments and does nothing. + + """ + pass + + +def first(predicate, it): + """Return the first element in `iterable` that `predicate` Gives a + :const:`True` value for. + + If `predicate` is None it will return the first item that is not None. + + """ + return next( + (v for v in it if (predicate(v) if predicate else v is not None)), + None, + ) + + +def firstmethod(method): + """Return a function that with a list of instances, + finds the first instance that gives a value for the given method. + + The list can also contain lazy instances + (:class:`~kombu.utils.functional.lazy`.) + + """ + + def _matcher(it, *args, **kwargs): + for obj in it: + try: + answer = getattr(maybe_evaluate(obj), method)(*args, **kwargs) + except AttributeError: + pass + else: + if answer is not None: + return answer + + return _matcher + + +def chunks(it, n): + """Split an iterator into chunks with `n` elements each. + + Examples + + # n == 2 + >>> x = chunks(iter([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]), 2) + >>> list(x) + [[0, 1], [2, 3], [4, 5], [6, 7], [8, 9], [10]] + + # n == 3 + >>> x = chunks(iter([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]), 3) + >>> list(x) + [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 10]] + + """ + # XXX This function is not used anymore, at least not by Celery itself. + for first in it: + yield [first] + list(islice(it, n - 1)) + + +def padlist(container, size, default=None): + """Pad list with default elements. + + Examples: + + >>> first, last, city = padlist(['George', 'Costanza', 'NYC'], 3) + ('George', 'Costanza', 'NYC') + >>> first, last, city = padlist(['George', 'Costanza'], 3) + ('George', 'Costanza', None) + >>> first, last, city, planet = padlist( + ... ['George', 'Costanza', 'NYC'], 4, default='Earth', + ... ) + ('George', 'Costanza', 'NYC', 'Earth') + + """ + return list(container)[:size] + [default] * (size - len(container)) + + +def mattrgetter(*attrs): + """Like :func:`operator.itemgetter` but return :const:`None` on missing + attributes instead of raising :exc:`AttributeError`.""" + return lambda obj: dict((attr, getattr(obj, attr, None)) + for attr in attrs) + + +def uniq(it): + """Return all unique elements in ``it``, preserving order.""" + seen = set() + return (seen.add(obj) or obj for obj in it if obj not in seen) + + +def regen(it): + """Regen takes any iterable, and if the object is an + generator it will cache the evaluated list on first access, + so that the generator can be "consumed" multiple times.""" + if isinstance(it, (list, tuple)): + return it + return _regen(it) + + +class _regen(UserList, list): + # must be subclass of list so that json can encode. + def __init__(self, it): + self.__it = it + + def __reduce__(self): + return list, (self.data, ) + + def __length_hint__(self): + return self.__it.__length_hint__() + + @cached_property + def data(self): + return list(self.__it) + + +def dictfilter(d=None, **kw): + """Remove all keys from dict ``d`` whose value is :const:`None`""" + d = kw if d is None else (dict(d, **kw) if kw else d) + return dict((k, v) for k, v in items(d) if v is not None) diff --git a/celery/utils/imports.py b/celery/utils/imports.py new file mode 100644 index 0000000..22a2fdc --- /dev/null +++ b/celery/utils/imports.py @@ -0,0 +1,114 @@ +# -*- coding: utf-8 -*- +""" + celery.utils.import + ~~~~~~~~~~~~~~~~~~~ + + Utilities related to importing modules and symbols by name. + +""" +from __future__ import absolute_import + +import imp as _imp +import importlib +import os +import sys + +from contextlib import contextmanager + +from kombu.utils import symbol_by_name + +from celery.five import reload + +__all__ = [ + 'NotAPackage', 'qualname', 'instantiate', 'symbol_by_name', 'cwd_in_path', + 'find_module', 'import_from_cwd', 'reload_from_cwd', 'module_file', +] + + +class NotAPackage(Exception): + pass + + +if sys.version_info > (3, 3): # pragma: no cover + def qualname(obj): + if not hasattr(obj, '__name__') and hasattr(obj, '__class__'): + obj = obj.__class__ + q = getattr(obj, '__qualname__', None) + if '.' not in q: + q = '.'.join((obj.__module__, q)) + return q +else: + def qualname(obj): # noqa + if not hasattr(obj, '__name__') and hasattr(obj, '__class__'): + obj = obj.__class__ + return '.'.join((obj.__module__, obj.__name__)) + + +def instantiate(name, *args, **kwargs): + """Instantiate class by name. + + See :func:`symbol_by_name`. + + """ + return symbol_by_name(name)(*args, **kwargs) + + +@contextmanager +def cwd_in_path(): + cwd = os.getcwd() + if cwd in sys.path: + yield + else: + sys.path.insert(0, cwd) + try: + yield cwd + finally: + try: + sys.path.remove(cwd) + except ValueError: # pragma: no cover + pass + + +def find_module(module, path=None, imp=None): + """Version of :func:`imp.find_module` supporting dots.""" + if imp is None: + imp = importlib.import_module + with cwd_in_path(): + if '.' in module: + last = None + parts = module.split('.') + for i, part in enumerate(parts[:-1]): + mpart = imp('.'.join(parts[:i + 1])) + try: + path = mpart.__path__ + except AttributeError: + raise NotAPackage(module) + last = _imp.find_module(parts[i + 1], path) + return last + return _imp.find_module(module) + + +def import_from_cwd(module, imp=None, package=None): + """Import module, but make sure it finds modules + located in the current directory. + + Modules located in the current directory has + precedence over modules located in `sys.path`. + """ + if imp is None: + imp = importlib.import_module + with cwd_in_path(): + return imp(module, package=package) + + +def reload_from_cwd(module, reloader=None): + if reloader is None: + reloader = reload + with cwd_in_path(): + return reloader(module) + + +def module_file(module): + """Return the correct original file name of a module.""" + name = module.__file__ + return name[:-1] if name.endswith('.pyc') else name diff --git a/celery/utils/iso8601.py b/celery/utils/iso8601.py new file mode 100644 index 0000000..c951cf6 --- /dev/null +++ b/celery/utils/iso8601.py @@ -0,0 +1,77 @@ +""" +Originally taken from pyiso8601 (http://code.google.com/p/pyiso8601/) + +Modified to match the behavior of dateutil.parser: + + - raise ValueError instead of ParseError + - return naive datetimes by default + - uses pytz.FixedOffset + +This is the original License: + +Copyright (c) 2007 Michael Twomey + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be included +in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +""" +from __future__ import absolute_import + +import re + +from datetime import datetime +from pytz import FixedOffset + +__all__ = ['parse_iso8601'] + +# Adapted from http://delete.me.uk/2005/03/iso8601.html +ISO8601_REGEX = re.compile( + r'(?P[0-9]{4})(-(?P[0-9]{1,2})(-(?P[0-9]{1,2})' + r'((?P.)(?P[0-9]{2}):(?P[0-9]{2})' + '(:(?P[0-9]{2})(\.(?P[0-9]+))?)?' + r'(?PZ|(([-+])([0-9]{2}):([0-9]{2})))?)?)?)?' +) +TIMEZONE_REGEX = re.compile( + '(?P[+-])(?P[0-9]{2}).(?P[0-9]{2})' +) + + +def parse_iso8601(datestring): + """Parse and convert ISO 8601 string into a datetime object""" + m = ISO8601_REGEX.match(datestring) + if not m: + raise ValueError('unable to parse date string %r' % datestring) + groups = m.groupdict() + tz = groups['timezone'] + if tz == 'Z': + tz = FixedOffset(0) + elif tz: + m = TIMEZONE_REGEX.match(tz) + prefix, hours, minutes = m.groups() + hours, minutes = int(hours), int(minutes) + if prefix == '-': + hours = -hours + minutes = -minutes + tz = FixedOffset(minutes + hours * 60) + frac = groups['fraction'] or 0 + return datetime( + int(groups['year']), int(groups['month']), int(groups['day']), + int(groups['hour']), int(groups['minute']), int(groups['second']), + int(frac), tz + ) diff --git a/celery/utils/log.py b/celery/utils/log.py new file mode 100644 index 0000000..b9226e1 --- /dev/null +++ b/celery/utils/log.py @@ -0,0 +1,297 @@ +# -*- coding: utf-8 -*- +""" + celery.utils.log + ~~~~~~~~~~~~~~~~ + + Logging utilities. + +""" +from __future__ import absolute_import, print_function + +import logging +import numbers +import os +import sys +import threading +import traceback + +from contextlib import contextmanager +from billiard import current_process, util as mputil +from kombu.five import values +from kombu.log import get_logger as _get_logger, LOG_LEVELS +from kombu.utils.encoding import safe_str + +from celery.five import string_t, text_t + +from .term import colored + +__all__ = ['ColorFormatter', 'LoggingProxy', 'base_logger', + 'set_in_sighandler', 'in_sighandler', 'get_logger', + 'get_task_logger', 'mlevel', 'ensure_process_aware_logger', + 'get_multiprocessing_logger', 'reset_multiprocessing_logger'] + +_process_aware = False +PY3 = sys.version_info[0] == 3 + +MP_LOG = os.environ.get('MP_LOG', False) + + +# Sets up our logging hierarchy. +# +# Every logger in the celery package inherits from the "celery" +# logger, and every task logger inherits from the "celery.task" +# logger. +base_logger = logger = _get_logger('celery') +mp_logger = _get_logger('multiprocessing') + +_in_sighandler = False + + +def set_in_sighandler(value): + global _in_sighandler + _in_sighandler = value + + +def iter_open_logger_fds(): + seen = set() + loggers = (list(values(logging.Logger.manager.loggerDict)) + + [logging.getLogger(None)]) + for logger in loggers: + try: + for handler in logger.handlers: + try: + if handler not in seen: + yield handler.stream + seen.add(handler) + except AttributeError: + pass + except AttributeError: # PlaceHolder does not have handlers + pass + + +@contextmanager +def in_sighandler(): + set_in_sighandler(True) + try: + yield + finally: + set_in_sighandler(False) + + +def logger_isa(l, p): + this, seen = l, set() + while this: + if this == p: + return True + else: + if this in seen: + raise RuntimeError( + 'Logger {0!r} parents recursive'.format(l), + ) + seen.add(this) + this = this.parent + return False + + +def get_logger(name): + l = _get_logger(name) + if logging.root not in (l, l.parent) and l is not base_logger: + if not logger_isa(l, base_logger): + l.parent = base_logger + return l +task_logger = get_logger('celery.task') +worker_logger = get_logger('celery.worker') + + +def get_task_logger(name): + logger = get_logger(name) + if not logger_isa(logger, task_logger): + logger.parent = task_logger + return logger + + +def mlevel(level): + if level and not isinstance(level, numbers.Integral): + return LOG_LEVELS[level.upper()] + return level + + +class ColorFormatter(logging.Formatter): + #: Loglevel -> Color mapping. + COLORS = colored().names + colors = {'DEBUG': COLORS['blue'], 'WARNING': COLORS['yellow'], + 'ERROR': COLORS['red'], 'CRITICAL': COLORS['magenta']} + + def __init__(self, fmt=None, use_color=True): + logging.Formatter.__init__(self, fmt) + self.use_color = use_color + + def formatException(self, ei): + if ei and not isinstance(ei, tuple): + ei = sys.exc_info() + r = logging.Formatter.formatException(self, ei) + if isinstance(r, str) and not PY3: + return safe_str(r) + return r + + def format(self, record): + msg = logging.Formatter.format(self, record) + color = self.colors.get(record.levelname) + + # reset exception info later for other handlers... + einfo = sys.exc_info() if record.exc_info == 1 else record.exc_info + + if color and self.use_color: + try: + # safe_str will repr the color object + # and color will break on non-string objects + # so need to reorder calls based on type. + # Issue #427 + try: + if isinstance(msg, string_t): + return text_t(color(safe_str(msg))) + return safe_str(color(msg)) + except UnicodeDecodeError: + return safe_str(msg) # skip colors + except Exception as exc: + prev_msg, record.exc_info, record.msg = ( + record.msg, 1, ''.format( + type(msg), exc + ), + ) + try: + return logging.Formatter.format(self, record) + finally: + record.msg, record.exc_info = prev_msg, einfo + else: + return safe_str(msg) + + +class LoggingProxy(object): + """Forward file object to :class:`logging.Logger` instance. + + :param logger: The :class:`logging.Logger` instance to forward to. + :param loglevel: Loglevel to use when writing messages. + + """ + mode = 'w' + name = None + closed = False + loglevel = logging.ERROR + _thread = threading.local() + + def __init__(self, logger, loglevel=None): + self.logger = logger + self.loglevel = mlevel(loglevel or self.logger.level or self.loglevel) + self._safewrap_handlers() + + def _safewrap_handlers(self): + """Make the logger handlers dump internal errors to + `sys.__stderr__` instead of `sys.stderr` to circumvent + infinite loops.""" + + def wrap_handler(handler): # pragma: no cover + + class WithSafeHandleError(logging.Handler): + + def handleError(self, record): + exc_info = sys.exc_info() + try: + try: + traceback.print_exception(exc_info[0], + exc_info[1], + exc_info[2], + None, sys.__stderr__) + except IOError: + pass # see python issue 5971 + finally: + del(exc_info) + + handler.handleError = WithSafeHandleError().handleError + return [wrap_handler(h) for h in self.logger.handlers] + + def write(self, data): + """Write message to logging object.""" + if _in_sighandler: + return print(safe_str(data), file=sys.__stderr__) + if getattr(self._thread, 'recurse_protection', False): + # Logger is logging back to this file, so stop recursing. + return + data = data.strip() + if data and not self.closed: + self._thread.recurse_protection = True + try: + self.logger.log(self.loglevel, safe_str(data)) + finally: + self._thread.recurse_protection = False + + def writelines(self, sequence): + """`writelines(sequence_of_strings) -> None`. + + Write the strings to the file. + + The sequence can be any iterable object producing strings. + This is equivalent to calling :meth:`write` for each string. + + """ + for part in sequence: + self.write(part) + + def flush(self): + """This object is not buffered so any :meth:`flush` requests + are ignored.""" + pass + + def close(self): + """When the object is closed, no write requests are forwarded to + the logging object anymore.""" + self.closed = True + + def isatty(self): + """Always return :const:`False`. Just here for file support.""" + return False + + +def ensure_process_aware_logger(force=False): + """Make sure process name is recorded when loggers are used.""" + global _process_aware + if force or not _process_aware: + logging._acquireLock() + try: + _process_aware = True + Logger = logging.getLoggerClass() + if getattr(Logger, '_process_aware', False): # pragma: no cover + return + + class ProcessAwareLogger(Logger): + _signal_safe = True + _process_aware = True + + def makeRecord(self, *args, **kwds): + record = Logger.makeRecord(self, *args, **kwds) + record.processName = current_process()._name + return record + + def log(self, *args, **kwargs): + if _in_sighandler: + return + return Logger.log(self, *args, **kwargs) + logging.setLoggerClass(ProcessAwareLogger) + finally: + logging._releaseLock() + + +def get_multiprocessing_logger(): + return mputil.get_logger() if mputil else None + + +def reset_multiprocessing_logger(): + if mputil and hasattr(mputil, '_logger'): + mputil._logger = None + + +def current_process_index(base=1): + if current_process: + index = getattr(current_process(), 'index', None) + return index + base if index is not None else index +ensure_process_aware_logger() diff --git a/celery/utils/mail.py b/celery/utils/mail.py new file mode 100644 index 0000000..00c5f29 --- /dev/null +++ b/celery/utils/mail.py @@ -0,0 +1,190 @@ +# -*- coding: utf-8 -*- +""" + celery.utils.mail + ~~~~~~~~~~~~~~~~~ + + How task error emails are formatted and sent. + +""" +from __future__ import absolute_import + +import smtplib +import socket +import traceback +import warnings + +from email.mime.text import MIMEText + +from .functional import maybe_list + +try: + from ssl import SSLError +except ImportError: # pragma: no cover + class SSLError(Exception): # noqa + """fallback used when ssl module not compiled.""" + +__all__ = ['SendmailWarning', 'Message', 'Mailer', 'ErrorMail'] + +_local_hostname = None + + +def get_local_hostname(): + global _local_hostname + if _local_hostname is None: + _local_hostname = socket.getfqdn() + return _local_hostname + + +class SendmailWarning(UserWarning): + """Problem happened while sending the email message.""" + + +class Message(object): + + def __init__(self, to=None, sender=None, subject=None, + body=None, charset='us-ascii'): + self.to = maybe_list(to) + self.sender = sender + self.subject = subject + self.body = body + self.charset = charset + + def __repr__(self): + return ''.format(self) + + def __str__(self): + msg = MIMEText(self.body, 'plain', self.charset) + msg['Subject'] = self.subject + msg['From'] = self.sender + msg['To'] = ', '.join(self.to) + return msg.as_string() + + +class Mailer(object): + + def __init__(self, host='localhost', port=0, user=None, password=None, + timeout=2, use_ssl=False, use_tls=False): + self.host = host + self.port = port + self.user = user + self.password = password + self.timeout = timeout + self.use_ssl = use_ssl + self.use_tls = use_tls + + def send(self, message, fail_silently=False, **kwargs): + try: + self._send(message, **kwargs) + except Exception as exc: + if not fail_silently: + raise + warnings.warn(SendmailWarning( + 'Mail could not be sent: {0!r} {1!r}\n{2!r}'.format( + exc, {'To': ', '.join(message.to), + 'Subject': message.subject}, + traceback.format_stack()))) + + def _send(self, message, **kwargs): + Client = smtplib.SMTP_SSL if self.use_ssl else smtplib.SMTP + client = Client(self.host, self.port, timeout=self.timeout, + local_hostname=get_local_hostname(), **kwargs) + + if self.use_tls: + client.ehlo() + client.starttls() + client.ehlo() + + if self.user and self.password: + client.login(self.user, self.password) + + client.sendmail(message.sender, message.to, str(message)) + try: + client.quit() + except SSLError: + client.close() + + +class ErrorMail(object): + """Defines how and when task error e-mails should be sent. + + :param task: The task instance that raised the error. + + :attr:`subject` and :attr:`body` are format strings which + are passed a context containing the following keys: + + * name + + Name of the task. + + * id + + UUID of the task. + + * exc + + String representation of the exception. + + * args + + Positional arguments. + + * kwargs + + Keyword arguments. + + * traceback + + String representation of the traceback. + + * hostname + + Worker nodename. + + """ + + # pep8.py borks on a inline signature separator and + # says "trailing whitespace" ;) + EMAIL_SIGNATURE_SEP = '-- ' + + #: Format string used to generate error email subjects. + subject = """\ + [{hostname}] Error: Task {name} ({id}): {exc!r} + """ + + #: Format string used to generate error email content. + body = """ +Task {{name}} with id {{id}} raised exception:\n{{exc!r}} + + +Task was called with args: {{args}} kwargs: {{kwargs}}. + +The contents of the full traceback was: + +{{traceback}} + +{EMAIL_SIGNATURE_SEP} +Just to let you know, +py-celery at {{hostname}}. +""".format(EMAIL_SIGNATURE_SEP=EMAIL_SIGNATURE_SEP) + + def __init__(self, task, **kwargs): + self.task = task + self.subject = kwargs.get('subject', self.subject) + self.body = kwargs.get('body', self.body) + + def should_send(self, context, exc): + """Return true or false depending on if a task error mail + should be sent for this type of error.""" + return True + + def format_subject(self, context): + return self.subject.strip().format(**context) + + def format_body(self, context): + return self.body.strip().format(**context) + + def send(self, context, exc, fail_silently=True): + if self.should_send(context, exc): + self.task.app.mail_admins(self.format_subject(context), + self.format_body(context), + fail_silently=fail_silently) diff --git a/celery/utils/objects.py b/celery/utils/objects.py new file mode 100644 index 0000000..1555f9c --- /dev/null +++ b/celery/utils/objects.py @@ -0,0 +1,84 @@ +# -*- coding: utf-8 -*- +""" + celery.utils.objects + ~~~~~~~~~~~~~~~~~~~~ + + Object related utilities including introspection, etc. + +""" +from __future__ import absolute_import + +__all__ = ['mro_lookup'] + + +def mro_lookup(cls, attr, stop=(), monkey_patched=[]): + """Return the first node by MRO order that defines an attribute. + + :keyword stop: A list of types that if reached will stop the search. + :keyword monkey_patched: Use one of the stop classes if the attr's + module origin is not in this list, this to detect monkey patched + attributes. + + :returns None: if the attribute was not found. + + """ + for node in cls.mro(): + if node in stop: + try: + attr = node.__dict__[attr] + module_origin = attr.__module__ + except (AttributeError, KeyError): + pass + else: + if module_origin not in monkey_patched: + return node + return + if attr in node.__dict__: + return node + + +class FallbackContext(object): + """The built-in ``@contextmanager`` utility does not work well + when wrapping other contexts, as the traceback is wrong when + the wrapped context raises. + + This solves this problem and can be used instead of ``@contextmanager`` + in this example:: + + @contextmanager + def connection_or_default_connection(connection=None): + if connection: + # user already has a connection, should not close + # after use + yield connection + else: + # must have new connection, and also close the connection + # after the block returns + with create_new_connection() as connection: + yield connection + + This wrapper can be used instead for the above like this:: + + def connection_or_default_connection(connection=None): + return FallbackContext(connection, create_new_connection) + + """ + + def __init__(self, provided, fallback, *fb_args, **fb_kwargs): + self.provided = provided + self.fallback = fallback + self.fb_args = fb_args + self.fb_kwargs = fb_kwargs + self._context = None + + def __enter__(self): + if self.provided is not None: + return self.provided + context = self._context = self.fallback( + *self.fb_args, **self.fb_kwargs + ).__enter__() + return context + + def __exit__(self, *exc_info): + if self._context is not None: + return self._context.__exit__(*exc_info) diff --git a/celery/utils/serialization.py b/celery/utils/serialization.py new file mode 100644 index 0000000..d5509f1 --- /dev/null +++ b/celery/utils/serialization.py @@ -0,0 +1,167 @@ +# -*- coding: utf-8 -*- +""" + celery.utils.serialization + ~~~~~~~~~~~~~~~~~~~~~~~~~~ + + Utilities for safely pickling exceptions. + +""" +from __future__ import absolute_import + +from inspect import getmro +from itertools import takewhile + +try: + import cPickle as pickle +except ImportError: + import pickle # noqa + +from .encoding import safe_repr + +__all__ = ['UnpickleableExceptionWrapper', 'subclass_exception', + 'find_pickleable_exception', 'create_exception_cls', + 'get_pickleable_exception', 'get_pickleable_etype', + 'get_pickled_exception'] + +#: List of base classes we probably don't want to reduce to. +try: + unwanted_base_classes = (StandardError, Exception, BaseException, object) +except NameError: # pragma: no cover + unwanted_base_classes = (Exception, BaseException, object) # py3k + + +def subclass_exception(name, parent, module): # noqa + return type(name, (parent, ), {'__module__': module}) + + +def find_pickleable_exception(exc, loads=pickle.loads, + dumps=pickle.dumps): + """With an exception instance, iterate over its super classes (by mro) + and find the first super exception that is pickleable. It does + not go below :exc:`Exception` (i.e. it skips :exc:`Exception`, + :class:`BaseException` and :class:`object`). If that happens + you should use :exc:`UnpickleableException` instead. + + :param exc: An exception instance. + + Will return the nearest pickleable parent exception class + (except :exc:`Exception` and parents), or if the exception is + pickleable it will return :const:`None`. + + :rtype :exc:`Exception`: + + """ + exc_args = getattr(exc, 'args', []) + for supercls in itermro(exc.__class__, unwanted_base_classes): + try: + superexc = supercls(*exc_args) + loads(dumps(superexc)) + except: + pass + else: + return superexc +find_nearest_pickleable_exception = find_pickleable_exception # XXX compat + + +def itermro(cls, stop): + return takewhile(lambda sup: sup not in stop, getmro(cls)) + + +def create_exception_cls(name, module, parent=None): + """Dynamically create an exception class.""" + if not parent: + parent = Exception + return subclass_exception(name, parent, module) + + +class UnpickleableExceptionWrapper(Exception): + """Wraps unpickleable exceptions. + + :param exc_module: see :attr:`exc_module`. + :param exc_cls_name: see :attr:`exc_cls_name`. + :param exc_args: see :attr:`exc_args` + + **Example** + + .. code-block:: python + + >>> def pickle_it(raising_function): + ... try: + ... raising_function() + ... except Exception as e: + ... exc = UnpickleableExceptionWrapper( + ... e.__class__.__module__, + ... e.__class__.__name__, + ... e.args, + ... ) + ... pickle.dumps(exc) # Works fine. + + """ + + #: The module of the original exception. + exc_module = None + + #: The name of the original exception class. + exc_cls_name = None + + #: The arguments for the original exception. + exc_args = None + + def __init__(self, exc_module, exc_cls_name, exc_args, text=None): + safe_exc_args = [] + for arg in exc_args: + try: + pickle.dumps(arg) + safe_exc_args.append(arg) + except Exception: + safe_exc_args.append(safe_repr(arg)) + self.exc_module = exc_module + self.exc_cls_name = exc_cls_name + self.exc_args = safe_exc_args + self.text = text + Exception.__init__(self, exc_module, exc_cls_name, safe_exc_args, text) + + def restore(self): + return create_exception_cls(self.exc_cls_name, + self.exc_module)(*self.exc_args) + + def __str__(self): + return self.text + + @classmethod + def from_exception(cls, exc): + return cls(exc.__class__.__module__, + exc.__class__.__name__, + getattr(exc, 'args', []), + safe_repr(exc)) + + +def get_pickleable_exception(exc): + """Make sure exception is pickleable.""" + try: + pickle.loads(pickle.dumps(exc)) + except Exception: + pass + else: + return exc + nearest = find_pickleable_exception(exc) + if nearest: + return nearest + return UnpickleableExceptionWrapper.from_exception(exc) + + +def get_pickleable_etype(cls, loads=pickle.loads, dumps=pickle.dumps): + try: + loads(dumps(cls)) + except: + return Exception + else: + return cls + + +def get_pickled_exception(exc): + """Get original exception from exception pickled using + :meth:`get_pickleable_exception`.""" + if isinstance(exc, UnpickleableExceptionWrapper): + return exc.restore() + return exc diff --git a/celery/utils/sysinfo.py b/celery/utils/sysinfo.py new file mode 100644 index 0000000..65073a6 --- /dev/null +++ b/celery/utils/sysinfo.py @@ -0,0 +1,45 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import + +import os + +from math import ceil + +from kombu.utils import cached_property + +__all__ = ['load_average', 'df'] + + +if hasattr(os, 'getloadavg'): + + def load_average(): + return tuple(ceil(l * 1e2) / 1e2 for l in os.getloadavg()) + +else: # pragma: no cover + # Windows doesn't have getloadavg + def load_average(): # noqa + return (0.0, 0.0, 0.0) + + +class df(object): + + def __init__(self, path): + self.path = path + + @property + def total_blocks(self): + return self.stat.f_blocks * self.stat.f_frsize / 1024 + + @property + def available(self): + return self.stat.f_bavail * self.stat.f_frsize / 1024 + + @property + def capacity(self): + avail = self.stat.f_bavail + used = self.stat.f_blocks - self.stat.f_bfree + return int(ceil(used * 100.0 / (used + avail) + 0.5)) + + @cached_property + def stat(self): + return os.statvfs(os.path.abspath(self.path)) diff --git a/celery/utils/term.py b/celery/utils/term.py new file mode 100644 index 0000000..f6f08d4 --- /dev/null +++ b/celery/utils/term.py @@ -0,0 +1,159 @@ +# -*- coding: utf-8 -*- +""" + celery.utils.term + ~~~~~~~~~~~~~~~~~ + + Terminals and colors. + +""" +from __future__ import absolute_import, unicode_literals + +import platform + +from functools import reduce + +from kombu.utils.encoding import safe_str +from celery.five import string + +__all__ = ['colored'] + +BLACK, RED, GREEN, YELLOW, BLUE, MAGENTA, CYAN, WHITE = range(8) +OP_SEQ = '\033[%dm' +RESET_SEQ = '\033[0m' +COLOR_SEQ = '\033[1;%dm' +fg = lambda s: COLOR_SEQ % s + +IS_WINDOWS = platform.system() == 'Windows' + + +class colored(object): + """Terminal colored text. + + Example:: + >>> c = colored(enabled=True) + >>> print(str(c.red('the quick '), c.blue('brown ', c.bold('fox ')), + ... c.magenta(c.underline('jumps over')), + ... c.yellow(' the lazy '), + ... c.green('dog '))) + + """ + + def __init__(self, *s, **kwargs): + self.s = s + self.enabled = not IS_WINDOWS and kwargs.get('enabled', True) + self.op = kwargs.get('op', '') + self.names = {'black': self.black, + 'red': self.red, + 'green': self.green, + 'yellow': self.yellow, + 'blue': self.blue, + 'magenta': self.magenta, + 'cyan': self.cyan, + 'white': self.white} + + def _add(self, a, b): + return string(a) + string(b) + + def _fold_no_color(self, a, b): + try: + A = a.no_color() + except AttributeError: + A = string(a) + try: + B = b.no_color() + except AttributeError: + B = string(b) + + return ''.join((string(A), string(B))) + + def no_color(self): + if self.s: + return string(reduce(self._fold_no_color, self.s)) + return '' + + def embed(self): + prefix = '' + if self.enabled: + prefix = self.op + return ''.join((string(prefix), string(reduce(self._add, self.s)))) + + def __unicode__(self): + suffix = '' + if self.enabled: + suffix = RESET_SEQ + return string(''.join((self.embed(), string(suffix)))) + + def __str__(self): + return safe_str(self.__unicode__()) + + def node(self, s, op): + return self.__class__(enabled=self.enabled, op=op, *s) + + def black(self, *s): + return self.node(s, fg(30 + BLACK)) + + def red(self, *s): + return self.node(s, fg(30 + RED)) + + def green(self, *s): + return self.node(s, fg(30 + GREEN)) + + def yellow(self, *s): + return self.node(s, fg(30 + YELLOW)) + + def blue(self, *s): + return self.node(s, fg(30 + BLUE)) + + def magenta(self, *s): + return self.node(s, fg(30 + MAGENTA)) + + def cyan(self, *s): + return self.node(s, fg(30 + CYAN)) + + def white(self, *s): + return self.node(s, fg(30 + WHITE)) + + def __repr__(self): + return repr(self.no_color()) + + def bold(self, *s): + return self.node(s, OP_SEQ % 1) + + def underline(self, *s): + return self.node(s, OP_SEQ % 4) + + def blink(self, *s): + return self.node(s, OP_SEQ % 5) + + def reverse(self, *s): + return self.node(s, OP_SEQ % 7) + + def bright(self, *s): + return self.node(s, OP_SEQ % 8) + + def ired(self, *s): + return self.node(s, fg(40 + RED)) + + def igreen(self, *s): + return self.node(s, fg(40 + GREEN)) + + def iyellow(self, *s): + return self.node(s, fg(40 + YELLOW)) + + def iblue(self, *s): + return self.node(s, fg(40 + BLUE)) + + def imagenta(self, *s): + return self.node(s, fg(40 + MAGENTA)) + + def icyan(self, *s): + return self.node(s, fg(40 + CYAN)) + + def iwhite(self, *s): + return self.node(s, fg(40 + WHITE)) + + def reset(self, *s): + return self.node(s or [''], RESET_SEQ) + + def __add__(self, other): + return string(self) + string(other) diff --git a/celery/utils/text.py b/celery/utils/text.py new file mode 100644 index 0000000..ffd2d72 --- /dev/null +++ b/celery/utils/text.py @@ -0,0 +1,86 @@ +# -*- coding: utf-8 -*- +""" + celery.utils.text + ~~~~~~~~~~~~~~~~~ + + Text formatting utilities + +""" +from __future__ import absolute_import + +from textwrap import fill + +from pprint import pformat + +__all__ = ['dedent_initial', 'dedent', 'fill_paragraphs', 'join', + 'ensure_2lines', 'abbr', 'abbrtask', 'indent', 'truncate', + 'pluralize', 'pretty'] + + +def dedent_initial(s, n=4): + return s[n:] if s[:n] == ' ' * n else s + + +def dedent(s, n=4, sep='\n'): + return sep.join(dedent_initial(l) for l in s.splitlines()) + + +def fill_paragraphs(s, width, sep='\n'): + return sep.join(fill(p, width) for p in s.split(sep)) + + +def join(l, sep='\n'): + return sep.join(v for v in l if v) + + +def ensure_2lines(s, sep='\n'): + if len(s.splitlines()) <= 2: + return s + sep + return s + + +def abbr(S, max, ellipsis='...'): + if S is None: + return '???' + if len(S) > max: + return ellipsis and (S[:max - len(ellipsis)] + ellipsis) or S[:max] + return S + + +def abbrtask(S, max): + if S is None: + return '???' + if len(S) > max: + module, _, cls = S.rpartition('.') + module = abbr(module, max - len(cls) - 3, False) + return module + '[.]' + cls + return S + + +def indent(t, indent=0, sep='\n'): + """Indent text.""" + return sep.join(' ' * indent + p for p in t.split(sep)) + + +def truncate(text, maxlen=128, suffix='...'): + """Truncates text to a maximum number of characters.""" + if len(text) >= maxlen: + return text[:maxlen].rsplit(' ', 1)[0] + suffix + return text + + +def pluralize(n, text, suffix='s'): + if n > 1: + return text + suffix + return text + + +def pretty(value, width=80, nl_width=80, sep='\n', **kw): + if isinstance(value, dict): + return '{{{0} {1}'.format(sep, pformat(value, 4, nl_width)[1:]) + elif isinstance(value, tuple): + return '{0}{1}{2}'.format( + sep, ' ' * 4, pformat(value, width=nl_width, **kw), + ) + else: + return pformat(value, width=width, **kw) diff --git a/celery/utils/threads.py b/celery/utils/threads.py new file mode 100644 index 0000000..5d42373 --- /dev/null +++ b/celery/utils/threads.py @@ -0,0 +1,329 @@ +# -*- coding: utf-8 -*- +""" + celery.utils.threads + ~~~~~~~~~~~~~~~~~~~~ + + Threading utilities. + +""" +from __future__ import absolute_import, print_function + +import os +import socket +import sys +import threading +import traceback + +from contextlib import contextmanager + +from celery.local import Proxy +from celery.five import THREAD_TIMEOUT_MAX, items + +__all__ = ['bgThread', 'Local', 'LocalStack', 'LocalManager', + 'get_ident', 'default_socket_timeout'] + +USE_FAST_LOCALS = os.environ.get('USE_FAST_LOCALS') +PY3 = sys.version_info[0] == 3 + + +@contextmanager +def default_socket_timeout(timeout): + prev = socket.getdefaulttimeout() + socket.setdefaulttimeout(timeout) + yield + socket.setdefaulttimeout(prev) + + +class bgThread(threading.Thread): + + def __init__(self, name=None, **kwargs): + super(bgThread, self).__init__() + self._is_shutdown = threading.Event() + self._is_stopped = threading.Event() + self.daemon = True + self.name = name or self.__class__.__name__ + + def body(self): + raise NotImplementedError('subclass responsibility') + + def on_crash(self, msg, *fmt, **kwargs): + print(msg.format(*fmt), file=sys.stderr) + exc_info = sys.exc_info() + try: + traceback.print_exception(exc_info[0], exc_info[1], exc_info[2], + None, sys.stderr) + finally: + del(exc_info) + + def run(self): + body = self.body + shutdown_set = self._is_shutdown.is_set + try: + while not shutdown_set(): + try: + body() + except Exception as exc: + try: + self.on_crash('{0!r} crashed: {1!r}', self.name, exc) + self._set_stopped() + finally: + os._exit(1) # exiting by normal means won't work + finally: + self._set_stopped() + + def _set_stopped(self): + try: + self._is_stopped.set() + except TypeError: # pragma: no cover + # we lost the race at interpreter shutdown, + # so gc collected built-in modules. + pass + + def stop(self): + """Graceful shutdown.""" + self._is_shutdown.set() + self._is_stopped.wait() + if self.is_alive(): + self.join(THREAD_TIMEOUT_MAX) + +try: + from greenlet import getcurrent as get_ident +except ImportError: # pragma: no cover + try: + from _thread import get_ident # noqa + except ImportError: + try: + from thread import get_ident # noqa + except ImportError: # pragma: no cover + try: + from _dummy_thread import get_ident # noqa + except ImportError: + from dummy_thread import get_ident # noqa + + +def release_local(local): + """Releases the contents of the local for the current context. + This makes it possible to use locals without a manager. + + Example:: + + >>> loc = Local() + >>> loc.foo = 42 + >>> release_local(loc) + >>> hasattr(loc, 'foo') + False + + With this function one can release :class:`Local` objects as well + as :class:`StackLocal` objects. However it is not possible to + release data held by proxies that way, one always has to retain + a reference to the underlying local object in order to be able + to release it. + + .. versionadded:: 0.6.1 + """ + local.__release_local__() + + +class Local(object): + __slots__ = ('__storage__', '__ident_func__') + + def __init__(self): + object.__setattr__(self, '__storage__', {}) + object.__setattr__(self, '__ident_func__', get_ident) + + def __iter__(self): + return iter(items(self.__storage__)) + + def __call__(self, proxy): + """Create a proxy for a name.""" + return Proxy(self, proxy) + + def __release_local__(self): + self.__storage__.pop(self.__ident_func__(), None) + + def __getattr__(self, name): + try: + return self.__storage__[self.__ident_func__()][name] + except KeyError: + raise AttributeError(name) + + def __setattr__(self, name, value): + ident = self.__ident_func__() + storage = self.__storage__ + try: + storage[ident][name] = value + except KeyError: + storage[ident] = {name: value} + + def __delattr__(self, name): + try: + del self.__storage__[self.__ident_func__()][name] + except KeyError: + raise AttributeError(name) + + +class _LocalStack(object): + """This class works similar to a :class:`Local` but keeps a stack + of objects instead. This is best explained with an example:: + + >>> ls = LocalStack() + >>> ls.push(42) + >>> ls.top + 42 + >>> ls.push(23) + >>> ls.top + 23 + >>> ls.pop() + 23 + >>> ls.top + 42 + + They can be force released by using a :class:`LocalManager` or with + the :func:`release_local` function but the correct way is to pop the + item from the stack after using. When the stack is empty it will + no longer be bound to the current context (and as such released). + + By calling the stack without arguments it will return a proxy that + resolves to the topmost item on the stack. + + """ + + def __init__(self): + self._local = Local() + + def __release_local__(self): + self._local.__release_local__() + + def _get__ident_func__(self): + return self._local.__ident_func__ + + def _set__ident_func__(self, value): + object.__setattr__(self._local, '__ident_func__', value) + __ident_func__ = property(_get__ident_func__, _set__ident_func__) + del _get__ident_func__, _set__ident_func__ + + def __call__(self): + def _lookup(): + rv = self.top + if rv is None: + raise RuntimeError('object unbound') + return rv + return Proxy(_lookup) + + def push(self, obj): + """Pushes a new item to the stack""" + rv = getattr(self._local, 'stack', None) + if rv is None: + self._local.stack = rv = [] + rv.append(obj) + return rv + + def pop(self): + """Remove the topmost item from the stack, will return the + old value or `None` if the stack was already empty. + """ + stack = getattr(self._local, 'stack', None) + if stack is None: + return None + elif len(stack) == 1: + release_local(self._local) + return stack[-1] + else: + return stack.pop() + + def __len__(self): + stack = getattr(self._local, 'stack', None) + return len(stack) if stack else 0 + + @property + def stack(self): + """get_current_worker_task uses this to find + the original task that was executed by the worker.""" + stack = getattr(self._local, 'stack', None) + if stack is not None: + return stack + return [] + + @property + def top(self): + """The topmost item on the stack. If the stack is empty, + `None` is returned. + """ + try: + return self._local.stack[-1] + except (AttributeError, IndexError): + return None + + +class LocalManager(object): + """Local objects cannot manage themselves. For that you need a local + manager. You can pass a local manager multiple locals or add them + later by appending them to `manager.locals`. Everytime the manager + cleans up it, will clean up all the data left in the locals for this + context. + + The `ident_func` parameter can be added to override the default ident + function for the wrapped locals. + + """ + + def __init__(self, locals=None, ident_func=None): + if locals is None: + self.locals = [] + elif isinstance(locals, Local): + self.locals = [locals] + else: + self.locals = list(locals) + if ident_func is not None: + self.ident_func = ident_func + for local in self.locals: + object.__setattr__(local, '__ident_func__', ident_func) + else: + self.ident_func = get_ident + + def get_ident(self): + """Return the context identifier the local objects use internally + for this context. You cannot override this method to change the + behavior but use it to link other context local objects (such as + SQLAlchemy's scoped sessions) to the Werkzeug locals.""" + return self.ident_func() + + def cleanup(self): + """Manually clean up the data in the locals for this context. + + Call this at the end of the request or use `make_middleware()`. + + """ + for local in self.locals: + release_local(local) + + def __repr__(self): + return '<{0} storages: {1}>'.format( + self.__class__.__name__, len(self.locals)) + + +class _FastLocalStack(threading.local): + + def __init__(self): + self.stack = [] + self.push = self.stack.append + self.pop = self.stack.pop + + @property + def top(self): + try: + return self.stack[-1] + except (AttributeError, IndexError): + return None + + def __len__(self): + return len(self.stack) + +if USE_FAST_LOCALS: # pragma: no cover + LocalStack = _FastLocalStack +else: + # - See #706 + # since each thread has its own greenlet we can just use those as + # identifiers for the context. If greenlets are not available we + # fall back to the current thread ident. + LocalStack = _LocalStack # noqa diff --git a/celery/utils/timer2.py b/celery/utils/timer2.py new file mode 100644 index 0000000..e42660c --- /dev/null +++ b/celery/utils/timer2.py @@ -0,0 +1,144 @@ +# -*- coding: utf-8 -*- +""" + timer2 + ~~~~~~ + + Scheduler for Python functions. + +""" +from __future__ import absolute_import + +import os +import sys +import threading + +from itertools import count +from time import sleep + +from celery.five import THREAD_TIMEOUT_MAX +from kombu.async.timer import Entry, Timer as Schedule, to_timestamp, logger + +TIMER_DEBUG = os.environ.get('TIMER_DEBUG') + +__all__ = ['Entry', 'Schedule', 'Timer', 'to_timestamp'] + + +class Timer(threading.Thread): + Entry = Entry + Schedule = Schedule + + running = False + on_tick = None + _timer_count = count(1) + + if TIMER_DEBUG: # pragma: no cover + def start(self, *args, **kwargs): + import traceback + print('- Timer starting') + traceback.print_stack() + super(Timer, self).start(*args, **kwargs) + + def __init__(self, schedule=None, on_error=None, on_tick=None, + on_start=None, max_interval=None, **kwargs): + self.schedule = schedule or self.Schedule(on_error=on_error, + max_interval=max_interval) + self.on_start = on_start + self.on_tick = on_tick or self.on_tick + threading.Thread.__init__(self) + self._is_shutdown = threading.Event() + self._is_stopped = threading.Event() + self.mutex = threading.Lock() + self.not_empty = threading.Condition(self.mutex) + self.daemon = True + self.name = 'Timer-{0}'.format(next(self._timer_count)) + + def _next_entry(self): + with self.not_empty: + delay, entry = next(self.scheduler) + if entry is None: + if delay is None: + self.not_empty.wait(1.0) + return delay + return self.schedule.apply_entry(entry) + __next__ = next = _next_entry # for 2to3 + + def run(self): + try: + self.running = True + self.scheduler = iter(self.schedule) + + while not self._is_shutdown.isSet(): + delay = self._next_entry() + if delay: + if self.on_tick: + self.on_tick(delay) + if sleep is None: # pragma: no cover + break + sleep(delay) + try: + self._is_stopped.set() + except TypeError: # pragma: no cover + # we lost the race at interpreter shutdown, + # so gc collected built-in modules. + pass + except Exception as exc: + logger.error('Thread Timer crashed: %r', exc, exc_info=True) + os._exit(1) + + def stop(self): + self._is_shutdown.set() + if self.running: + self._is_stopped.wait() + self.join(THREAD_TIMEOUT_MAX) + self.running = False + + def ensure_started(self): + if not self.running and not self.isAlive(): + if self.on_start: + self.on_start(self) + self.start() + + def _do_enter(self, meth, *args, **kwargs): + self.ensure_started() + with self.mutex: + entry = getattr(self.schedule, meth)(*args, **kwargs) + self.not_empty.notify() + return entry + + def enter(self, entry, eta, priority=None): + return self._do_enter('enter_at', entry, eta, priority=priority) + + def call_at(self, *args, **kwargs): + return self._do_enter('call_at', *args, **kwargs) + + def enter_after(self, *args, **kwargs): + return self._do_enter('enter_after', *args, **kwargs) + + def call_after(self, *args, **kwargs): + return self._do_enter('call_after', *args, **kwargs) + + def call_repeatedly(self, *args, **kwargs): + return self._do_enter('call_repeatedly', *args, **kwargs) + + def exit_after(self, secs, priority=10): + self.call_after(secs, sys.exit, priority) + + def cancel(self, tref): + tref.cancel() + + def clear(self): + self.schedule.clear() + + def empty(self): + return not len(self) + + def __len__(self): + return len(self.schedule) + + def __bool__(self): + return True + __nonzero__ = __bool__ + + @property + def queue(self): + return self.schedule.queue diff --git a/celery/utils/timeutils.py b/celery/utils/timeutils.py new file mode 100644 index 0000000..3ec4014 --- /dev/null +++ b/celery/utils/timeutils.py @@ -0,0 +1,345 @@ +# -*- coding: utf-8 -*- +""" + celery.utils.timeutils + ~~~~~~~~~~~~~~~~~~~~~~ + + This module contains various utilities related to dates and times. + +""" +from __future__ import absolute_import + +import numbers +import os +import time as _time + +from calendar import monthrange +from datetime import date, datetime, timedelta, tzinfo + +from kombu.utils import cached_property, reprcall +from kombu.utils.compat import timedelta_seconds + +from pytz import timezone as _timezone, AmbiguousTimeError + +from celery.five import string_t + +from .functional import dictfilter +from .iso8601 import parse_iso8601 +from .text import pluralize + +__all__ = ['LocalTimezone', 'timezone', 'maybe_timedelta', 'timedelta_seconds', + 'delta_resolution', 'remaining', 'rate', 'weekday', + 'humanize_seconds', 'maybe_iso8601', 'is_naive', 'make_aware', + 'localize', 'to_utc', 'maybe_make_aware', 'ffwd', 'utcoffset', + 'adjust_timestamp', 'maybe_s_to_ms'] + +C_REMDEBUG = os.environ.get('C_REMDEBUG', False) + +DAYNAMES = 'sun', 'mon', 'tue', 'wed', 'thu', 'fri', 'sat' +WEEKDAYS = dict(zip(DAYNAMES, range(7))) + +RATE_MODIFIER_MAP = {'s': lambda n: n, + 'm': lambda n: n / 60.0, + 'h': lambda n: n / 60.0 / 60.0} + +TIME_UNITS = (('day', 60 * 60 * 24.0, lambda n: format(n, '.2f')), + ('hour', 60 * 60.0, lambda n: format(n, '.2f')), + ('minute', 60.0, lambda n: format(n, '.2f')), + ('second', 1.0, lambda n: format(n, '.2f'))) + +ZERO = timedelta(0) + +_local_timezone = None + + +class LocalTimezone(tzinfo): + """Local time implementation taken from Python's docs. + + Used only when UTC is not enabled. + """ + + def __init__(self): + # This code is moved in __init__ to execute it as late as possible + # See get_default_timezone(). + self.STDOFFSET = timedelta(seconds=-_time.timezone) + if _time.daylight: + self.DSTOFFSET = timedelta(seconds=-_time.altzone) + else: + self.DSTOFFSET = self.STDOFFSET + self.DSTDIFF = self.DSTOFFSET - self.STDOFFSET + tzinfo.__init__(self) + + def __repr__(self): + return '' + + def utcoffset(self, dt): + if self._isdst(dt): + return self.DSTOFFSET + else: + return self.STDOFFSET + + def dst(self, dt): + if self._isdst(dt): + return self.DSTDIFF + else: + return ZERO + + def tzname(self, dt): + return _time.tzname[self._isdst(dt)] + + def _isdst(self, dt): + tt = (dt.year, dt.month, dt.day, + dt.hour, dt.minute, dt.second, + dt.weekday(), 0, 0) + stamp = _time.mktime(tt) + tt = _time.localtime(stamp) + return tt.tm_isdst > 0 + + +class _Zone(object): + + def tz_or_local(self, tzinfo=None): + if tzinfo is None: + return self.local + return self.get_timezone(tzinfo) + + def to_local(self, dt, local=None, orig=None): + if is_naive(dt): + dt = make_aware(dt, orig or self.utc) + return localize(dt, self.tz_or_local(local)) + + def to_system(self, dt): + return localize(dt, self.local) + + def to_local_fallback(self, dt): + if is_naive(dt): + return make_aware(dt, self.local) + return localize(dt, self.local) + + def get_timezone(self, zone): + if isinstance(zone, string_t): + return _timezone(zone) + return zone + + @cached_property + def local(self): + return LocalTimezone() + + @cached_property + def utc(self): + return self.get_timezone('UTC') +timezone = _Zone() + + +def maybe_timedelta(delta): + """Coerces integer to timedelta if `delta` is an integer.""" + if isinstance(delta, numbers.Real): + return timedelta(seconds=delta) + return delta + + +def delta_resolution(dt, delta): + """Round a datetime to the resolution of a timedelta. + + If the timedelta is in days, the datetime will be rounded + to the nearest days, if the timedelta is in hours the datetime + will be rounded to the nearest hour, and so on until seconds + which will just return the original datetime. + + """ + delta = timedelta_seconds(delta) + + resolutions = ((3, lambda x: x / 86400), + (4, lambda x: x / 3600), + (5, lambda x: x / 60)) + + args = dt.year, dt.month, dt.day, dt.hour, dt.minute, dt.second + for res, predicate in resolutions: + if predicate(delta) >= 1.0: + return datetime(*args[:res], tzinfo=dt.tzinfo) + return dt + + +def remaining(start, ends_in, now=None, relative=False): + """Calculate the remaining time for a start date and a timedelta. + + e.g. "how many seconds left for 30 seconds after start?" + + :param start: Start :class:`~datetime.datetime`. + :param ends_in: The end delta as a :class:`~datetime.timedelta`. + :keyword relative: If enabled the end time will be + calculated using :func:`delta_resolution` (i.e. rounded to the + resolution of `ends_in`). + :keyword now: Function returning the current time and date, + defaults to :func:`datetime.utcnow`. + + """ + now = now or datetime.utcnow() + end_date = start + ends_in + if relative: + end_date = delta_resolution(end_date, ends_in) + ret = end_date - now + if C_REMDEBUG: # pragma: no cover + print('rem: NOW:%r START:%r ENDS_IN:%r END_DATE:%s REM:%s' % ( + now, start, ends_in, end_date, ret)) + return ret + + +def rate(rate): + """Parse rate strings, such as `"100/m"`, `"2/h"` or `"0.5/s"` + and convert them to seconds.""" + if rate: + if isinstance(rate, string_t): + ops, _, modifier = rate.partition('/') + return RATE_MODIFIER_MAP[modifier or 's'](float(ops)) or 0 + return rate or 0 + return 0 + + +def weekday(name): + """Return the position of a weekday (0 - 7, where 0 is Sunday). + + Example:: + + >>> weekday('sunday'), weekday('sun'), weekday('mon') + (0, 0, 1) + + """ + abbreviation = name[0:3].lower() + try: + return WEEKDAYS[abbreviation] + except KeyError: + # Show original day name in exception, instead of abbr. + raise KeyError(name) + + +def humanize_seconds(secs, prefix='', sep='', now='now'): + """Show seconds in human form, e.g. 60 is "1 minute", 7200 is "2 + hours". + + :keyword prefix: Can be used to add a preposition to the output, + e.g. 'in' will give 'in 1 second', but add nothing to 'now'. + + """ + secs = float(secs) + for unit, divider, formatter in TIME_UNITS: + if secs >= divider: + w = secs / divider + return '{0}{1}{2} {3}'.format(prefix, sep, formatter(w), + pluralize(w, unit)) + return now + + +def maybe_iso8601(dt): + """`Either datetime | str -> datetime or None -> None`""" + if not dt: + return + if isinstance(dt, datetime): + return dt + return parse_iso8601(dt) + + +def is_naive(dt): + """Return :const:`True` if the datetime is naive + (does not have timezone information).""" + return dt.tzinfo is None or dt.tzinfo.utcoffset(dt) is None + + +def make_aware(dt, tz): + """Sets the timezone for a datetime object.""" + try: + _localize = tz.localize + except AttributeError: + return dt.replace(tzinfo=tz) + else: + # works on pytz timezones + try: + return _localize(dt, is_dst=None) + except AmbiguousTimeError: + return min(_localize(dt, is_dst=True), + _localize(dt, is_dst=False)) + + +def localize(dt, tz): + """Convert aware datetime to another timezone.""" + dt = dt.astimezone(tz) + try: + _normalize = tz.normalize + except AttributeError: # non-pytz tz + return dt + else: + try: + return _normalize(dt, is_dst=None) + except TypeError: + return _normalize(dt) + except AmbiguousTimeError: + return min(_normalize(dt, is_dst=True), + _normalize(dt, is_dst=False)) + + +def to_utc(dt): + """Converts naive datetime to UTC""" + return make_aware(dt, timezone.utc) + + +def maybe_make_aware(dt, tz=None): + if is_naive(dt): + dt = to_utc(dt) + return localize( + dt, timezone.utc if tz is None else timezone.tz_or_local(tz), + ) + + +class ffwd(object): + """Version of relativedelta that only supports addition.""" + + def __init__(self, year=None, month=None, weeks=0, weekday=None, day=None, + hour=None, minute=None, second=None, microsecond=None, + **kwargs): + self.year = year + self.month = month + self.weeks = weeks + self.weekday = weekday + self.day = day + self.hour = hour + self.minute = minute + self.second = second + self.microsecond = microsecond + self.days = weeks * 7 + self._has_time = self.hour is not None or self.minute is not None + + def __repr__(self): + return reprcall('ffwd', (), self._fields(weeks=self.weeks, + weekday=self.weekday)) + + def __radd__(self, other): + if not isinstance(other, date): + return NotImplemented + year = self.year or other.year + month = self.month or other.month + day = min(monthrange(year, month)[1], self.day or other.day) + ret = other.replace(**dict(dictfilter(self._fields()), + year=year, month=month, day=day)) + if self.weekday is not None: + ret += timedelta(days=(7 - ret.weekday() + self.weekday) % 7) + return ret + timedelta(days=self.days) + + def _fields(self, **extra): + return dictfilter({ + 'year': self.year, 'month': self.month, 'day': self.day, + 'hour': self.hour, 'minute': self.minute, + 'second': self.second, 'microsecond': self.microsecond, + }, **extra) + + +def utcoffset(time=_time): + if time.daylight: + return time.altzone // 3600 + return time.timezone // 3600 + + +def adjust_timestamp(ts, offset, here=utcoffset): + return ts - (offset - here()) * 3600 + + +def maybe_s_to_ms(v): + return int(float(v) * 1000.0) if v is not None else v diff --git a/celery/worker/__init__.py b/celery/worker/__init__.py new file mode 100644 index 0000000..217902d --- /dev/null +++ b/celery/worker/__init__.py @@ -0,0 +1,388 @@ +# -*- coding: utf-8 -*- +""" + celery.worker + ~~~~~~~~~~~~~ + + :class:`WorkController` can be used to instantiate in-process workers. + + The worker consists of several components, all managed by bootsteps + (mod:`celery.bootsteps`). + +""" +from __future__ import absolute_import + +import os +import sys +import traceback +try: + import resource +except ImportError: # pragma: no cover + resource = None # noqa + +from billiard import cpu_count +from billiard.util import Finalize +from kombu.syn import detect_environment + +from celery import bootsteps +from celery.bootsteps import RUN, TERMINATE +from celery import concurrency as _concurrency +from celery import platforms +from celery import signals +from celery.exceptions import ( + ImproperlyConfigured, WorkerTerminate, TaskRevokedError, +) +from celery.five import string_t, values +from celery.utils import default_nodename, worker_direct +from celery.utils.imports import reload_from_cwd +from celery.utils.log import mlevel, worker_logger as logger +from celery.utils.threads import default_socket_timeout + +from . import state + +__all__ = ['WorkController', 'default_nodename'] + +#: Default socket timeout at shutdown. +SHUTDOWN_SOCKET_TIMEOUT = 5.0 + +SELECT_UNKNOWN_QUEUE = """\ +Trying to select queue subset of {0!r}, but queue {1} is not +defined in the CELERY_QUEUES setting. + +If you want to automatically declare unknown queues you can +enable the CELERY_CREATE_MISSING_QUEUES setting. +""" + +DESELECT_UNKNOWN_QUEUE = """\ +Trying to deselect queue subset of {0!r}, but queue {1} is not +defined in the CELERY_QUEUES setting. +""" + + +def str_to_list(s): + if isinstance(s, string_t): + return s.split(',') + return s + + +class WorkController(object): + """Unmanaged worker instance.""" + app = None + + pidlock = None + blueprint = None + pool = None + semaphore = None + + class Blueprint(bootsteps.Blueprint): + """Worker bootstep blueprint.""" + name = 'Worker' + default_steps = set([ + 'celery.worker.components:Hub', + 'celery.worker.components:Queues', + 'celery.worker.components:Pool', + 'celery.worker.components:Beat', + 'celery.worker.components:Timer', + 'celery.worker.components:StateDB', + 'celery.worker.components:Consumer', + 'celery.worker.autoscale:WorkerComponent', + 'celery.worker.autoreload:WorkerComponent', + + ]) + + def __init__(self, app=None, hostname=None, **kwargs): + self.app = app or self.app + self.hostname = default_nodename(hostname) + self.app.loader.init_worker() + self.on_before_init(**kwargs) + self.setup_defaults(**kwargs) + self.on_after_init(**kwargs) + + self.setup_instance(**self.prepare_args(**kwargs)) + self._finalize = [ + Finalize(self, self._send_worker_shutdown, exitpriority=10), + ] + + def setup_instance(self, queues=None, ready_callback=None, pidfile=None, + include=None, use_eventloop=None, exclude_queues=None, + **kwargs): + self.pidfile = pidfile + self.setup_queues(queues, exclude_queues) + self.setup_includes(str_to_list(include)) + + # Set default concurrency + if not self.concurrency: + try: + self.concurrency = cpu_count() + except NotImplementedError: + self.concurrency = 2 + + # Options + self.loglevel = mlevel(self.loglevel) + self.ready_callback = ready_callback or self.on_consumer_ready + + # this connection is not established, only used for params + self._conninfo = self.app.connection() + self.use_eventloop = ( + self.should_use_eventloop() if use_eventloop is None + else use_eventloop + ) + self.options = kwargs + + signals.worker_init.send(sender=self) + + # Initialize bootsteps + self.pool_cls = _concurrency.get_implementation(self.pool_cls) + self.steps = [] + self.on_init_blueprint() + self.blueprint = self.Blueprint(app=self.app, + on_start=self.on_start, + on_close=self.on_close, + on_stopped=self.on_stopped) + self.blueprint.apply(self, **kwargs) + + def on_init_blueprint(self): + pass + + def on_before_init(self, **kwargs): + pass + + def on_after_init(self, **kwargs): + pass + + def on_start(self): + if self.pidfile: + self.pidlock = platforms.create_pidlock(self.pidfile) + + def on_consumer_ready(self, consumer): + pass + + def on_close(self): + self.app.loader.shutdown_worker() + + def on_stopped(self): + self.timer.stop() + self.consumer.shutdown() + + if self.pidlock: + self.pidlock.release() + + def setup_queues(self, include, exclude=None): + include = str_to_list(include) + exclude = str_to_list(exclude) + try: + self.app.amqp.queues.select(include) + except KeyError as exc: + raise ImproperlyConfigured( + SELECT_UNKNOWN_QUEUE.format(include, exc)) + try: + self.app.amqp.queues.deselect(exclude) + except KeyError as exc: + raise ImproperlyConfigured( + DESELECT_UNKNOWN_QUEUE.format(exclude, exc)) + if self.app.conf.CELERY_WORKER_DIRECT: + self.app.amqp.queues.select_add(worker_direct(self.hostname)) + + def setup_includes(self, includes): + # Update celery_include to have all known task modules, so that we + # ensure all task modules are imported in case an execv happens. + prev = tuple(self.app.conf.CELERY_INCLUDE) + if includes: + prev += tuple(includes) + [self.app.loader.import_task_module(m) for m in includes] + self.include = includes + task_modules = set(task.__class__.__module__ + for task in values(self.app.tasks)) + self.app.conf.CELERY_INCLUDE = tuple(set(prev) | task_modules) + + def prepare_args(self, **kwargs): + return kwargs + + def _send_worker_shutdown(self): + signals.worker_shutdown.send(sender=self) + + def start(self): + """Starts the workers main loop.""" + try: + self.blueprint.start(self) + except WorkerTerminate: + self.terminate() + except Exception as exc: + logger.error('Unrecoverable error: %r', exc, exc_info=True) + self.stop() + except (KeyboardInterrupt, SystemExit): + self.stop() + + def register_with_event_loop(self, hub): + self.blueprint.send_all( + self, 'register_with_event_loop', args=(hub, ), + description='hub.register', + ) + + def _process_task_sem(self, req): + return self._quick_acquire(self._process_task, req) + + def _process_task(self, req): + """Process task by sending it to the pool of workers.""" + try: + req.execute_using_pool(self.pool) + except TaskRevokedError: + try: + self._quick_release() # Issue 877 + except AttributeError: + pass + except Exception as exc: + logger.critical('Internal error: %r\n%s', + exc, traceback.format_exc(), exc_info=True) + + def signal_consumer_close(self): + try: + self.consumer.close() + except AttributeError: + pass + + def should_use_eventloop(self): + return (detect_environment() == 'default' and + self._conninfo.is_evented and not self.app.IS_WINDOWS) + + def stop(self, in_sighandler=False): + """Graceful shutdown of the worker server.""" + if self.blueprint.state == RUN: + self.signal_consumer_close() + if not in_sighandler or self.pool.signal_safe: + self._shutdown(warm=True) + + def terminate(self, in_sighandler=False): + """Not so graceful shutdown of the worker server.""" + if self.blueprint.state != TERMINATE: + self.signal_consumer_close() + if not in_sighandler or self.pool.signal_safe: + self._shutdown(warm=False) + + def _shutdown(self, warm=True): + # if blueprint does not exist it means that we had an + # error before the bootsteps could be initialized. + if self.blueprint is not None: + with default_socket_timeout(SHUTDOWN_SOCKET_TIMEOUT): # Issue 975 + self.blueprint.stop(self, terminate=not warm) + self.blueprint.join() + + def reload(self, modules=None, reload=False, reloader=None): + modules = self.app.loader.task_modules if modules is None else modules + imp = self.app.loader.import_from_cwd + + for module in set(modules or ()): + if module not in sys.modules: + logger.debug('importing module %s', module) + imp(module) + elif reload: + logger.debug('reloading module %s', module) + reload_from_cwd(sys.modules[module], reloader) + + if self.consumer: + self.consumer.update_strategies() + self.consumer.reset_rate_limits() + self.pool.restart() + + def info(self): + return {'total': self.state.total_count, + 'pid': os.getpid(), + 'clock': str(self.app.clock)} + + def rusage(self): + if resource is None: + raise NotImplementedError('rusage not supported by this platform') + s = resource.getrusage(resource.RUSAGE_SELF) + return { + 'utime': s.ru_utime, + 'stime': s.ru_stime, + 'maxrss': s.ru_maxrss, + 'ixrss': s.ru_ixrss, + 'idrss': s.ru_idrss, + 'isrss': s.ru_isrss, + 'minflt': s.ru_minflt, + 'majflt': s.ru_majflt, + 'nswap': s.ru_nswap, + 'inblock': s.ru_inblock, + 'oublock': s.ru_oublock, + 'msgsnd': s.ru_msgsnd, + 'msgrcv': s.ru_msgrcv, + 'nsignals': s.ru_nsignals, + 'nvcsw': s.ru_nvcsw, + 'nivcsw': s.ru_nivcsw, + } + + def stats(self): + info = self.info() + info.update(self.blueprint.info(self)) + info.update(self.consumer.blueprint.info(self.consumer)) + try: + info['rusage'] = self.rusage() + except NotImplementedError: + info['rusage'] = 'N/A' + return info + + def __repr__(self): + return ''.format( + self=self, state=self.blueprint.human_state(), + ) + + def __str__(self): + return self.hostname + + @property + def state(self): + return state + + def setup_defaults(self, concurrency=None, loglevel=None, logfile=None, + send_events=None, pool_cls=None, consumer_cls=None, + timer_cls=None, timer_precision=None, + autoscaler_cls=None, autoreloader_cls=None, + pool_putlocks=None, pool_restarts=None, + force_execv=None, state_db=None, + schedule_filename=None, scheduler_cls=None, + task_time_limit=None, task_soft_time_limit=None, + max_tasks_per_child=None, prefetch_multiplier=None, + disable_rate_limits=None, worker_lost_wait=None, **_kw): + self.concurrency = self._getopt('concurrency', concurrency) + self.loglevel = self._getopt('log_level', loglevel) + self.logfile = self._getopt('log_file', logfile) + self.send_events = self._getopt('send_events', send_events) + self.pool_cls = self._getopt('pool', pool_cls) + self.consumer_cls = self._getopt('consumer', consumer_cls) + self.timer_cls = self._getopt('timer', timer_cls) + self.timer_precision = self._getopt('timer_precision', timer_precision) + self.autoscaler_cls = self._getopt('autoscaler', autoscaler_cls) + self.autoreloader_cls = self._getopt('autoreloader', autoreloader_cls) + self.pool_putlocks = self._getopt('pool_putlocks', pool_putlocks) + self.pool_restarts = self._getopt('pool_restarts', pool_restarts) + self.force_execv = self._getopt('force_execv', force_execv) + self.state_db = self._getopt('state_db', state_db) + self.schedule_filename = self._getopt( + 'schedule_filename', schedule_filename, + ) + self.scheduler_cls = self._getopt( + 'celerybeat_scheduler', scheduler_cls, + ) + self.task_time_limit = self._getopt( + 'task_time_limit', task_time_limit, + ) + self.task_soft_time_limit = self._getopt( + 'task_soft_time_limit', task_soft_time_limit, + ) + self.max_tasks_per_child = self._getopt( + 'max_tasks_per_child', max_tasks_per_child, + ) + self.prefetch_multiplier = int(self._getopt( + 'prefetch_multiplier', prefetch_multiplier, + )) + self.disable_rate_limits = self._getopt( + 'disable_rate_limits', disable_rate_limits, + ) + self.worker_lost_wait = self._getopt( + 'worker_lost_wait', worker_lost_wait, + ) + + def _getopt(self, key, value): + if value is not None: + return value + return self.app.conf.find_value_for_key(key, namespace='celeryd') diff --git a/celery/worker/autoreload.py b/celery/worker/autoreload.py new file mode 100644 index 0000000..8ade32f --- /dev/null +++ b/celery/worker/autoreload.py @@ -0,0 +1,302 @@ +# -*- coding: utf-8 -*- +""" + celery.worker.autoreload + ~~~~~~~~~~~~~~~~~~~~~~~~ + + This module implements automatic module reloading +""" +from __future__ import absolute_import + +import hashlib +import os +import select +import sys +import time + +from collections import defaultdict +from threading import Event + +from kombu.utils import eventio +from kombu.utils.encoding import ensure_bytes + +from celery import bootsteps +from celery.five import items +from celery.platforms import ignore_errno +from celery.utils.imports import module_file +from celery.utils.log import get_logger +from celery.utils.threads import bgThread + +from .components import Pool + +try: # pragma: no cover + import pyinotify + _ProcessEvent = pyinotify.ProcessEvent +except ImportError: # pragma: no cover + pyinotify = None # noqa + _ProcessEvent = object # noqa + +__all__ = [ + 'WorkerComponent', 'Autoreloader', 'Monitor', 'BaseMonitor', + 'StatMonitor', 'KQueueMonitor', 'InotifyMonitor', 'file_hash', +] + +logger = get_logger(__name__) + + +class WorkerComponent(bootsteps.StartStopStep): + label = 'Autoreloader' + conditional = True + requires = (Pool, ) + + def __init__(self, w, autoreload=None, **kwargs): + self.enabled = w.autoreload = autoreload + w.autoreloader = None + + def create(self, w): + w.autoreloader = self.instantiate(w.autoreloader_cls, w) + return w.autoreloader if not w.use_eventloop else None + + def register_with_event_loop(self, w, hub): + w.autoreloader.register_with_event_loop(hub) + hub.on_close.add(w.autoreloader.on_event_loop_close) + + +def file_hash(filename, algorithm='md5'): + hobj = hashlib.new(algorithm) + with open(filename, 'rb') as f: + for chunk in iter(lambda: f.read(2 ** 20), ''): + hobj.update(ensure_bytes(chunk)) + return hobj.digest() + + +class BaseMonitor(object): + + def __init__(self, files, + on_change=None, shutdown_event=None, interval=0.5): + self.files = files + self.interval = interval + self._on_change = on_change + self.modify_times = defaultdict(int) + self.shutdown_event = shutdown_event or Event() + + def start(self): + raise NotImplementedError('Subclass responsibility') + + def stop(self): + pass + + def on_change(self, modified): + if self._on_change: + return self._on_change(modified) + + def on_event_loop_close(self, hub): + pass + + +class StatMonitor(BaseMonitor): + """File change monitor based on the ``stat`` system call.""" + + def _mtimes(self): + return ((f, self._mtime(f)) for f in self.files) + + def _maybe_modified(self, f, mt): + return mt is not None and self.modify_times[f] != mt + + def register_with_event_loop(self, hub): + hub.call_repeatedly(2.0, self.find_changes) + + def find_changes(self): + maybe_modified = self._maybe_modified + modified = dict((f, mt) for f, mt in self._mtimes() + if maybe_modified(f, mt)) + if modified: + self.on_change(modified) + self.modify_times.update(modified) + + def start(self): + while not self.shutdown_event.is_set(): + self.find_changes() + time.sleep(self.interval) + + @staticmethod + def _mtime(path): + try: + return os.stat(path).st_mtime + except Exception: + pass + + +class KQueueMonitor(BaseMonitor): + """File change monitor based on BSD kernel event notifications""" + + def __init__(self, *args, **kwargs): + super(KQueueMonitor, self).__init__(*args, **kwargs) + self.filemap = dict((f, None) for f in self.files) + self.fdmap = {} + + def register_with_event_loop(self, hub): + if eventio.kqueue is not None: + self._kq = eventio._kqueue() + self.add_events(self._kq) + self._kq.on_file_change = self.handle_event + hub.add_reader(self._kq._kqueue, self._kq.poll, 0) + + def on_event_loop_close(self, hub): + self.close(self._kq) + + def add_events(self, poller): + for f in self.filemap: + self.filemap[f] = fd = os.open(f, os.O_RDONLY) + self.fdmap[fd] = f + poller.watch_file(fd) + + def handle_event(self, events): + self.on_change([self.fdmap[e.ident] for e in events]) + + def start(self): + self.poller = eventio.poll() + self.add_events(self.poller) + self.poller.on_file_change = self.handle_event + while not self.shutdown_event.is_set(): + self.poller.poll(1) + + def close(self, poller): + for f, fd in items(self.filemap): + if fd is not None: + poller.unregister(fd) + with ignore_errno('EBADF'): # pragma: no cover + os.close(fd) + self.filemap.clear() + self.fdmap.clear() + + def stop(self): + self.close(self.poller) + self.poller.close() + + +class InotifyMonitor(_ProcessEvent): + """File change monitor based on Linux kernel `inotify` subsystem""" + + def __init__(self, modules, on_change=None, **kwargs): + assert pyinotify + self._modules = modules + self._on_change = on_change + self._wm = None + self._notifier = None + + def register_with_event_loop(self, hub): + self.create_notifier() + hub.add_reader(self._wm.get_fd(), self.on_readable) + + def on_event_loop_close(self, hub): + pass + + def on_readable(self): + self._notifier.read_events() + self._notifier.process_events() + + def create_notifier(self): + self._wm = pyinotify.WatchManager() + self._notifier = pyinotify.Notifier(self._wm, self) + add_watch = self._wm.add_watch + flags = pyinotify.IN_MODIFY | pyinotify.IN_ATTRIB + for m in self._modules: + add_watch(m, flags) + + def start(self): + try: + self.create_notifier() + self._notifier.loop() + finally: + if self._wm: + self._wm.close() + # Notifier.close is called at the end of Notifier.loop + self._wm = self._notifier = None + + def stop(self): + pass + + def process_(self, event): + self.on_change([event.path]) + + process_IN_ATTRIB = process_IN_MODIFY = process_ + + def on_change(self, modified): + if self._on_change: + return self._on_change(modified) + + +def default_implementation(): + if hasattr(select, 'kqueue') and eventio.kqueue is not None: + return 'kqueue' + elif sys.platform.startswith('linux') and pyinotify: + return 'inotify' + else: + return 'stat' + +implementations = {'kqueue': KQueueMonitor, + 'inotify': InotifyMonitor, + 'stat': StatMonitor} +Monitor = implementations[ + os.environ.get('CELERYD_FSNOTIFY') or default_implementation()] + + +class Autoreloader(bgThread): + """Tracks changes in modules and fires reload commands""" + Monitor = Monitor + + def __init__(self, controller, modules=None, monitor_cls=None, **options): + super(Autoreloader, self).__init__() + self.controller = controller + app = self.controller.app + self.modules = app.loader.task_modules if modules is None else modules + self.options = options + self._monitor = None + self._hashes = None + self.file_to_module = {} + + def on_init(self): + files = self.file_to_module + files.update(dict( + (module_file(sys.modules[m]), m) for m in self.modules)) + + self._monitor = self.Monitor( + files, self.on_change, + shutdown_event=self._is_shutdown, **self.options) + self._hashes = dict([(f, file_hash(f)) for f in files]) + + def register_with_event_loop(self, hub): + if self._monitor is None: + self.on_init() + self._monitor.register_with_event_loop(hub) + + def on_event_loop_close(self, hub): + if self._monitor is not None: + self._monitor.on_event_loop_close(hub) + + def body(self): + self.on_init() + with ignore_errno('EINTR', 'EAGAIN'): + self._monitor.start() + + def _maybe_modified(self, f): + if os.path.exists(f): + digest = file_hash(f) + if digest != self._hashes[f]: + self._hashes[f] = digest + return True + return False + + def on_change(self, files): + modified = [f for f in files if self._maybe_modified(f)] + if modified: + names = [self.file_to_module[module] for module in modified] + logger.info('Detected modified modules: %r', names) + self._reload(names) + + def _reload(self, modules): + self.controller.reload(modules, reload=True) + + def stop(self): + if self._monitor: + self._monitor.stop() diff --git a/celery/worker/autoscale.py b/celery/worker/autoscale.py new file mode 100644 index 0000000..14afc2e --- /dev/null +++ b/celery/worker/autoscale.py @@ -0,0 +1,162 @@ +# -*- coding: utf-8 -*- +""" + celery.worker.autoscale + ~~~~~~~~~~~~~~~~~~~~~~~ + + This module implements the internal thread responsible + for growing and shrinking the pool according to the + current autoscale settings. + + The autoscale thread is only enabled if :option:`--autoscale` + has been enabled on the command-line. + +""" +from __future__ import absolute_import + +import os +import threading + +from time import sleep + +from kombu.async.semaphore import DummyLock + +from celery import bootsteps +from celery.five import monotonic +from celery.utils.log import get_logger +from celery.utils.threads import bgThread + +from . import state +from .components import Pool + +__all__ = ['Autoscaler', 'WorkerComponent'] + +logger = get_logger(__name__) +debug, info, error = logger.debug, logger.info, logger.error + +AUTOSCALE_KEEPALIVE = float(os.environ.get('AUTOSCALE_KEEPALIVE', 30)) + + +class WorkerComponent(bootsteps.StartStopStep): + label = 'Autoscaler' + conditional = True + requires = (Pool, ) + + def __init__(self, w, **kwargs): + self.enabled = w.autoscale + w.autoscaler = None + + def create(self, w): + scaler = w.autoscaler = self.instantiate( + w.autoscaler_cls, + w.pool, w.max_concurrency, w.min_concurrency, + worker=w, mutex=DummyLock() if w.use_eventloop else None, + ) + return scaler if not w.use_eventloop else None + + def register_with_event_loop(self, w, hub): + w.consumer.on_task_message.add(w.autoscaler.maybe_scale) + hub.call_repeatedly( + w.autoscaler.keepalive, w.autoscaler.maybe_scale, + ) + + +class Autoscaler(bgThread): + + def __init__(self, pool, max_concurrency, + min_concurrency=0, worker=None, + keepalive=AUTOSCALE_KEEPALIVE, mutex=None): + super(Autoscaler, self).__init__() + self.pool = pool + self.mutex = mutex or threading.Lock() + self.max_concurrency = max_concurrency + self.min_concurrency = min_concurrency + self.keepalive = keepalive + self._last_action = None + self.worker = worker + + assert self.keepalive, 'cannot scale down too fast.' + + def body(self): + with self.mutex: + self.maybe_scale() + sleep(1.0) + + def _maybe_scale(self): + procs = self.processes + cur = min(self.qty, self.max_concurrency) + if cur > procs: + self.scale_up(cur - procs) + return True + elif cur < procs: + self.scale_down((procs - cur) - self.min_concurrency) + return True + + def maybe_scale(self): + if self._maybe_scale(): + self.pool.maintain_pool() + + def update(self, max=None, min=None): + with self.mutex: + if max is not None: + if max < self.max_concurrency: + self._shrink(self.processes - max) + self.max_concurrency = max + if min is not None: + if min > self.min_concurrency: + self._grow(min - self.min_concurrency) + self.min_concurrency = min + return self.max_concurrency, self.min_concurrency + + def force_scale_up(self, n): + with self.mutex: + new = self.processes + n + if new > self.max_concurrency: + self.max_concurrency = new + self.min_concurrency += 1 + self._grow(n) + + def force_scale_down(self, n): + with self.mutex: + new = self.processes - n + if new < self.min_concurrency: + self.min_concurrency = max(new, 0) + self._shrink(min(n, self.processes)) + + def scale_up(self, n): + self._last_action = monotonic() + return self._grow(n) + + def scale_down(self, n): + if n and self._last_action and ( + monotonic() - self._last_action > self.keepalive): + self._last_action = monotonic() + return self._shrink(n) + + def _grow(self, n): + info('Scaling up %s processes.', n) + self.pool.grow(n) + self.worker.consumer._update_prefetch_count(n) + + def _shrink(self, n): + info('Scaling down %s processes.', n) + try: + self.pool.shrink(n) + except ValueError: + debug("Autoscaler won't scale down: all processes busy.") + except Exception as exc: + error('Autoscaler: scale_down: %r', exc, exc_info=True) + self.worker.consumer._update_prefetch_count(-n) + + def info(self): + return {'max': self.max_concurrency, + 'min': self.min_concurrency, + 'current': self.processes, + 'qty': self.qty} + + @property + def qty(self): + return len(state.reserved_requests) + + @property + def processes(self): + return self.pool.num_processes diff --git a/celery/worker/components.py b/celery/worker/components.py new file mode 100644 index 0000000..d23a3b6 --- /dev/null +++ b/celery/worker/components.py @@ -0,0 +1,247 @@ +# -*- coding: utf-8 -*- +""" + celery.worker.components + ~~~~~~~~~~~~~~~~~~~~~~~~ + + Default worker bootsteps. + +""" +from __future__ import absolute_import + +import atexit +import warnings + +from kombu.async import Hub as _Hub, get_event_loop, set_event_loop +from kombu.async.semaphore import DummyLock, LaxBoundedSemaphore +from kombu.async.timer import Timer as _Timer + +from celery import bootsteps +from celery._state import _set_task_join_will_block +from celery.exceptions import ImproperlyConfigured +from celery.five import string_t +from celery.utils.log import worker_logger as logger + +__all__ = ['Timer', 'Hub', 'Queues', 'Pool', 'Beat', 'StateDB', 'Consumer'] + +ERR_B_GREEN = """\ +-B option doesn't work with eventlet/gevent pools: \ +use standalone beat instead.\ +""" + +W_POOL_SETTING = """ +The CELERYD_POOL setting should not be used to select the eventlet/gevent +pools, instead you *must use the -P* argument so that patches are applied +as early as possible. +""" + + +class Timer(bootsteps.Step): + """This step initializes the internal timer used by the worker.""" + + def create(self, w): + if w.use_eventloop: + # does not use dedicated timer thread. + w.timer = _Timer(max_interval=10.0) + else: + if not w.timer_cls: + # Default Timer is set by the pool, as e.g. eventlet + # needs a custom implementation. + w.timer_cls = w.pool_cls.Timer + w.timer = self.instantiate(w.timer_cls, + max_interval=w.timer_precision, + on_timer_error=self.on_timer_error, + on_timer_tick=self.on_timer_tick) + + def on_timer_error(self, exc): + logger.error('Timer error: %r', exc, exc_info=True) + + def on_timer_tick(self, delay): + logger.debug('Timer wake-up! Next eta %s secs.', delay) + + +class Hub(bootsteps.StartStopStep): + requires = (Timer, ) + + def __init__(self, w, **kwargs): + w.hub = None + + def include_if(self, w): + return w.use_eventloop + + def create(self, w): + w.hub = get_event_loop() + if w.hub is None: + w.hub = set_event_loop(_Hub(w.timer)) + self._patch_thread_primitives(w) + return self + + def start(self, w): + pass + + def stop(self, w): + w.hub.close() + + def terminate(self, w): + w.hub.close() + + def _patch_thread_primitives(self, w): + # make clock use dummy lock + w.app.clock.mutex = DummyLock() + # multiprocessing's ApplyResult uses this lock. + try: + from billiard import pool + except ImportError: + pass + else: + pool.Lock = DummyLock + + +class Queues(bootsteps.Step): + """This bootstep initializes the internal queues + used by the worker.""" + label = 'Queues (intra)' + requires = (Hub, ) + + def create(self, w): + w.process_task = w._process_task + if w.use_eventloop: + if w.pool_putlocks and w.pool_cls.uses_semaphore: + w.process_task = w._process_task_sem + + +class Pool(bootsteps.StartStopStep): + """Bootstep managing the worker pool. + + Describes how to initialize the worker pool, and starts and stops + the pool during worker startup/shutdown. + + Adds attributes: + + * autoscale + * pool + * max_concurrency + * min_concurrency + + """ + requires = (Queues, ) + + def __init__(self, w, autoscale=None, autoreload=None, + no_execv=False, optimization=None, **kwargs): + if isinstance(autoscale, string_t): + max_c, _, min_c = autoscale.partition(',') + autoscale = [int(max_c), min_c and int(min_c) or 0] + w.autoscale = autoscale + w.pool = None + w.max_concurrency = None + w.min_concurrency = w.concurrency + w.no_execv = no_execv + if w.autoscale: + w.max_concurrency, w.min_concurrency = w.autoscale + self.autoreload_enabled = autoreload + self.optimization = optimization + + def close(self, w): + if w.pool: + w.pool.close() + + def terminate(self, w): + if w.pool: + w.pool.terminate() + + def create(self, w, semaphore=None, max_restarts=None): + if w.app.conf.CELERYD_POOL in ('eventlet', 'gevent'): + warnings.warn(UserWarning(W_POOL_SETTING)) + threaded = not w.use_eventloop + procs = w.min_concurrency + forking_enable = w.no_execv if w.force_execv else True + if not threaded: + semaphore = w.semaphore = LaxBoundedSemaphore(procs) + w._quick_acquire = w.semaphore.acquire + w._quick_release = w.semaphore.release + max_restarts = 100 + allow_restart = self.autoreload_enabled or w.pool_restarts + pool = w.pool = self.instantiate( + w.pool_cls, w.min_concurrency, + initargs=(w.app, w.hostname), + maxtasksperchild=w.max_tasks_per_child, + timeout=w.task_time_limit, + soft_timeout=w.task_soft_time_limit, + putlocks=w.pool_putlocks and threaded, + lost_worker_timeout=w.worker_lost_wait, + threads=threaded, + max_restarts=max_restarts, + allow_restart=allow_restart, + forking_enable=forking_enable, + semaphore=semaphore, + sched_strategy=self.optimization, + ) + _set_task_join_will_block(pool.task_join_will_block) + return pool + + def info(self, w): + return {'pool': w.pool.info if w.pool else 'N/A'} + + def register_with_event_loop(self, w, hub): + w.pool.register_with_event_loop(hub) + + +class Beat(bootsteps.StartStopStep): + """Step used to embed a beat process. + + This will only be enabled if the ``beat`` + argument is set. + + """ + label = 'Beat' + conditional = True + + def __init__(self, w, beat=False, **kwargs): + self.enabled = w.beat = beat + w.beat = None + + def create(self, w): + from celery.beat import EmbeddedService + if w.pool_cls.__module__.endswith(('gevent', 'eventlet')): + raise ImproperlyConfigured(ERR_B_GREEN) + b = w.beat = EmbeddedService(app=w.app, + schedule_filename=w.schedule_filename, + scheduler_cls=w.scheduler_cls) + return b + + +class StateDB(bootsteps.Step): + """This bootstep sets up the workers state db if enabled.""" + + def __init__(self, w, **kwargs): + self.enabled = w.state_db + w._persistence = None + + def create(self, w): + w._persistence = w.state.Persistent(w.state, w.state_db, w.app.clock) + atexit.register(w._persistence.save) + + +class Consumer(bootsteps.StartStopStep): + last = True + + def create(self, w): + if w.max_concurrency: + prefetch_count = max(w.min_concurrency, 1) * w.prefetch_multiplier + else: + prefetch_count = w.concurrency * w.prefetch_multiplier + c = w.consumer = self.instantiate( + w.consumer_cls, w.process_task, + hostname=w.hostname, + send_events=w.send_events, + init_callback=w.ready_callback, + initial_prefetch_count=prefetch_count, + pool=w.pool, + timer=w.timer, + app=w.app, + controller=w, + hub=w.hub, + worker_options=w.options, + disable_rate_limits=w.disable_rate_limits, + prefetch_multiplier=w.prefetch_multiplier, + ) + return c diff --git a/celery/worker/consumer.py b/celery/worker/consumer.py new file mode 100644 index 0000000..b2b6f1b --- /dev/null +++ b/celery/worker/consumer.py @@ -0,0 +1,824 @@ +# -*- coding: utf-8 -*- +""" +celery.worker.consumer +~~~~~~~~~~~~~~~~~~~~~~ + +This module contains the components responsible for consuming messages +from the broker, processing the messages and keeping the broker connections +up and running. + +""" +from __future__ import absolute_import + +import errno +import kombu +import logging +import os +import socket + +from collections import defaultdict +from functools import partial +from heapq import heappush +from operator import itemgetter +from time import sleep + +from billiard.common import restart_state +from billiard.exceptions import RestartFreqExceeded +from kombu.async.semaphore import DummyLock +from kombu.common import QoS, ignore_errors +from kombu.syn import _detect_environment +from kombu.utils.compat import get_errno +from kombu.utils.encoding import safe_repr, bytes_t +from kombu.utils.limits import TokenBucket + +from celery import bootsteps +from celery.app.trace import build_tracer +from celery.canvas import signature +from celery.exceptions import InvalidTaskError +from celery.five import items, values +from celery.utils.functional import noop +from celery.utils.log import get_logger +from celery.utils.text import truncate +from celery.utils.timeutils import humanize_seconds, rate + +from . import heartbeat, loops, pidbox +from .state import task_reserved, maybe_shutdown, revoked, reserved_requests + +try: + buffer_t = buffer +except NameError: # pragma: no cover + # Py3 does not have buffer, but we only need isinstance. + + class buffer_t(object): # noqa + pass + +__all__ = [ + 'Consumer', 'Connection', 'Events', 'Heart', 'Control', + 'Tasks', 'Evloop', 'Agent', 'Mingle', 'Gossip', 'dump_body', +] + +CLOSE = bootsteps.CLOSE +logger = get_logger(__name__) +debug, info, warn, error, crit = (logger.debug, logger.info, logger.warning, + logger.error, logger.critical) + +CONNECTION_RETRY = """\ +consumer: Connection to broker lost. \ +Trying to re-establish the connection...\ +""" + +CONNECTION_RETRY_STEP = """\ +Trying again {when}...\ +""" + +CONNECTION_ERROR = """\ +consumer: Cannot connect to %s: %s. +%s +""" + +CONNECTION_FAILOVER = """\ +Will retry using next failover.\ +""" + +UNKNOWN_FORMAT = """\ +Received and deleted unknown message. Wrong destination?!? + +The full contents of the message body was: %s +""" + +#: Error message for when an unregistered task is received. +UNKNOWN_TASK_ERROR = """\ +Received unregistered task of type %s. +The message has been ignored and discarded. + +Did you remember to import the module containing this task? +Or maybe you are using relative imports? +Please see http://bit.ly/gLye1c for more information. + +The full contents of the message body was: +%s +""" + +#: Error message for when an invalid task message is received. +INVALID_TASK_ERROR = """\ +Received invalid task message: %s +The message has been ignored and discarded. + +Please ensure your message conforms to the task +message protocol as described here: http://bit.ly/hYj41y + +The full contents of the message body was: +%s +""" + +MESSAGE_DECODE_ERROR = """\ +Can't decode message body: %r [type:%r encoding:%r headers:%s] + +body: %s +""" + +MESSAGE_REPORT = """\ +body: {0} +{{content_type:{1} content_encoding:{2} + delivery_info:{3} headers={4}}} +""" + +MINGLE_GET_FIELDS = itemgetter('clock', 'revoked') + + +def dump_body(m, body): + if isinstance(body, buffer_t): + body = bytes_t(body) + return '{0} ({1}b)'.format(truncate(safe_repr(body), 1024), + len(m.body)) + + +class Consumer(object): + Strategies = dict + + #: set when consumer is shutting down. + in_shutdown = False + + #: Optional callback called the first time the worker + #: is ready to receive tasks. + init_callback = None + + #: The current worker pool instance. + pool = None + + #: A timer used for high-priority internal tasks, such + #: as sending heartbeats. + timer = None + + restart_count = -1 # first start is the same as a restart + + class Blueprint(bootsteps.Blueprint): + name = 'Consumer' + default_steps = [ + 'celery.worker.consumer:Connection', + 'celery.worker.consumer:Mingle', + 'celery.worker.consumer:Events', + 'celery.worker.consumer:Gossip', + 'celery.worker.consumer:Heart', + 'celery.worker.consumer:Control', + 'celery.worker.consumer:Tasks', + 'celery.worker.consumer:Evloop', + 'celery.worker.consumer:Agent', + ] + + def shutdown(self, parent): + self.send_all(parent, 'shutdown') + + def __init__(self, on_task_request, + init_callback=noop, hostname=None, + pool=None, app=None, + timer=None, controller=None, hub=None, amqheartbeat=None, + worker_options=None, disable_rate_limits=False, + initial_prefetch_count=2, prefetch_multiplier=1, **kwargs): + self.app = app + self.controller = controller + self.init_callback = init_callback + self.hostname = hostname or socket.gethostname() + self.pid = os.getpid() + self.pool = pool + self.timer = timer + self.strategies = self.Strategies() + conninfo = self.app.connection() + self.connection_errors = conninfo.connection_errors + self.channel_errors = conninfo.channel_errors + self._restart_state = restart_state(maxR=5, maxT=1) + + self._does_info = logger.isEnabledFor(logging.INFO) + self.on_task_request = on_task_request + self.on_task_message = set() + self.amqheartbeat_rate = self.app.conf.BROKER_HEARTBEAT_CHECKRATE + self.disable_rate_limits = disable_rate_limits + self.initial_prefetch_count = initial_prefetch_count + self.prefetch_multiplier = prefetch_multiplier + + # this contains a tokenbucket for each task type by name, used for + # rate limits, or None if rate limits are disabled for that task. + self.task_buckets = defaultdict(lambda: None) + self.reset_rate_limits() + + self.hub = hub + if self.hub: + self.amqheartbeat = amqheartbeat + if self.amqheartbeat is None: + self.amqheartbeat = self.app.conf.BROKER_HEARTBEAT + else: + self.amqheartbeat = 0 + + if not hasattr(self, 'loop'): + self.loop = loops.asynloop if hub else loops.synloop + + if _detect_environment() == 'gevent': + # there's a gevent bug that causes timeouts to not be reset, + # so if the connection timeout is exceeded once, it can NEVER + # connect again. + self.app.conf.BROKER_CONNECTION_TIMEOUT = None + + self.steps = [] + self.blueprint = self.Blueprint( + app=self.app, on_close=self.on_close, + ) + self.blueprint.apply(self, **dict(worker_options or {}, **kwargs)) + + def bucket_for_task(self, type): + limit = rate(getattr(type, 'rate_limit', None)) + return TokenBucket(limit, capacity=1) if limit else None + + def reset_rate_limits(self): + self.task_buckets.update( + (n, self.bucket_for_task(t)) for n, t in items(self.app.tasks) + ) + + def _update_prefetch_count(self, index=0): + """Update prefetch count after pool/shrink grow operations. + + Index must be the change in number of processes as a positive + (increasing) or negative (decreasing) number. + + .. note:: + + Currently pool grow operations will end up with an offset + of +1 if the initial size of the pool was 0 (e.g. + ``--autoscale=1,0``). + + """ + num_processes = self.pool.num_processes + if not self.initial_prefetch_count or not num_processes: + return # prefetch disabled + self.initial_prefetch_count = ( + self.pool.num_processes * self.prefetch_multiplier + ) + return self._update_qos_eventually(index) + + def _update_qos_eventually(self, index): + return (self.qos.decrement_eventually if index < 0 + else self.qos.increment_eventually)( + abs(index) * self.prefetch_multiplier) + + def _limit_task(self, request, bucket, tokens): + if not bucket.can_consume(tokens): + hold = bucket.expected_time(tokens) + self.timer.call_after( + hold, self._limit_task, (request, bucket, tokens), + ) + else: + task_reserved(request) + self.on_task_request(request) + + def start(self): + blueprint = self.blueprint + while blueprint.state != CLOSE: + self.restart_count += 1 + maybe_shutdown() + try: + blueprint.start(self) + except self.connection_errors as exc: + if isinstance(exc, OSError) and get_errno(exc) == errno.EMFILE: + raise # Too many open files + maybe_shutdown() + try: + self._restart_state.step() + except RestartFreqExceeded as exc: + crit('Frequent restarts detected: %r', exc, exc_info=1) + sleep(1) + if blueprint.state != CLOSE and self.connection: + warn(CONNECTION_RETRY, exc_info=True) + try: + self.connection.collect() + except Exception: + pass + self.on_close() + blueprint.restart(self) + + def register_with_event_loop(self, hub): + self.blueprint.send_all( + self, 'register_with_event_loop', args=(hub, ), + description='Hub.register', + ) + + def shutdown(self): + self.in_shutdown = True + self.blueprint.shutdown(self) + + def stop(self): + self.blueprint.stop(self) + + def on_ready(self): + callback, self.init_callback = self.init_callback, None + if callback: + callback(self) + + def loop_args(self): + return (self, self.connection, self.task_consumer, + self.blueprint, self.hub, self.qos, self.amqheartbeat, + self.app.clock, self.amqheartbeat_rate) + + def on_decode_error(self, message, exc): + """Callback called if an error occurs while decoding + a message received. + + Simply logs the error and acknowledges the message so it + doesn't enter a loop. + + :param message: The message with errors. + :param exc: The original exception instance. + + """ + crit(MESSAGE_DECODE_ERROR, + exc, message.content_type, message.content_encoding, + safe_repr(message.headers), dump_body(message, message.body), + exc_info=1) + message.ack() + + def on_close(self): + # Clear internal queues to get rid of old messages. + # They can't be acked anyway, as a delivery tag is specific + # to the current channel. + if self.controller and self.controller.semaphore: + self.controller.semaphore.clear() + if self.timer: + self.timer.clear() + reserved_requests.clear() + if self.pool and self.pool.flush: + self.pool.flush() + + def connect(self): + """Establish the broker connection. + + Will retry establishing the connection if the + :setting:`BROKER_CONNECTION_RETRY` setting is enabled + + """ + conn = self.app.connection(heartbeat=self.amqheartbeat) + + # Callback called for each retry while the connection + # can't be established. + def _error_handler(exc, interval, next_step=CONNECTION_RETRY_STEP): + if getattr(conn, 'alt', None) and interval == 0: + next_step = CONNECTION_FAILOVER + error(CONNECTION_ERROR, conn.as_uri(), exc, + next_step.format(when=humanize_seconds(interval, 'in', ' '))) + + # remember that the connection is lazy, it won't establish + # until needed. + if not self.app.conf.BROKER_CONNECTION_RETRY: + # retry disabled, just call connect directly. + conn.connect() + return conn + + conn = conn.ensure_connection( + _error_handler, self.app.conf.BROKER_CONNECTION_MAX_RETRIES, + callback=maybe_shutdown, + ) + if self.hub: + conn.transport.register_with_event_loop(conn.connection, self.hub) + return conn + + def add_task_queue(self, queue, exchange=None, exchange_type=None, + routing_key=None, **options): + cset = self.task_consumer + queues = self.app.amqp.queues + # Must use in' here, as __missing__ will automatically + # create queues when CELERY_CREATE_MISSING_QUEUES is enabled. + # (Issue #1079) + if queue in queues: + q = queues[queue] + else: + exchange = queue if exchange is None else exchange + exchange_type = ('direct' if exchange_type is None + else exchange_type) + q = queues.select_add(queue, + exchange=exchange, + exchange_type=exchange_type, + routing_key=routing_key, **options) + if not cset.consuming_from(queue): + cset.add_queue(q) + cset.consume() + info('Started consuming from %s', queue) + + def cancel_task_queue(self, queue): + info('Cancelling queue %s', queue) + self.app.amqp.queues.deselect(queue) + self.task_consumer.cancel_by_queue(queue) + + def apply_eta_task(self, task): + """Method called by the timer to apply a task with an + ETA/countdown.""" + task_reserved(task) + self.on_task_request(task) + self.qos.decrement_eventually() + + def _message_report(self, body, message): + return MESSAGE_REPORT.format(dump_body(message, body), + safe_repr(message.content_type), + safe_repr(message.content_encoding), + safe_repr(message.delivery_info), + safe_repr(message.headers)) + + def on_unknown_message(self, body, message): + warn(UNKNOWN_FORMAT, self._message_report(body, message)) + message.reject_log_error(logger, self.connection_errors) + + def on_unknown_task(self, body, message, exc): + error(UNKNOWN_TASK_ERROR, exc, dump_body(message, body), exc_info=True) + message.reject_log_error(logger, self.connection_errors) + + def on_invalid_task(self, body, message, exc): + error(INVALID_TASK_ERROR, exc, dump_body(message, body), exc_info=True) + message.reject_log_error(logger, self.connection_errors) + + def update_strategies(self): + loader = self.app.loader + for name, task in items(self.app.tasks): + self.strategies[name] = task.start_strategy(self.app, self) + task.__trace__ = build_tracer(name, task, loader, self.hostname, + app=self.app) + + def create_task_handler(self): + strategies = self.strategies + on_unknown_message = self.on_unknown_message + on_unknown_task = self.on_unknown_task + on_invalid_task = self.on_invalid_task + callbacks = self.on_task_message + + def on_task_received(body, message): + try: + name = body['task'] + except (KeyError, TypeError): + return on_unknown_message(body, message) + + try: + strategies[name](message, body, + message.ack_log_error, + message.reject_log_error, + callbacks) + except KeyError as exc: + on_unknown_task(body, message, exc) + except InvalidTaskError as exc: + on_invalid_task(body, message, exc) + + return on_task_received + + def __repr__(self): + return ''.format( + self=self, state=self.blueprint.human_state(), + ) + + +class Connection(bootsteps.StartStopStep): + + def __init__(self, c, **kwargs): + c.connection = None + + def start(self, c): + c.connection = c.connect() + info('Connected to %s', c.connection.as_uri()) + + def shutdown(self, c): + # We must set self.connection to None here, so + # that the green pidbox thread exits. + connection, c.connection = c.connection, None + if connection: + ignore_errors(connection, connection.close) + + def info(self, c, params='N/A'): + if c.connection: + params = c.connection.info() + params.pop('password', None) # don't send password. + return {'broker': params} + + +class Events(bootsteps.StartStopStep): + requires = (Connection, ) + + def __init__(self, c, send_events=None, **kwargs): + self.send_events = True + self.groups = None if send_events else ['worker'] + c.event_dispatcher = None + + def start(self, c): + # flush events sent while connection was down. + prev = self._close(c) + dis = c.event_dispatcher = c.app.events.Dispatcher( + c.connect(), hostname=c.hostname, + enabled=self.send_events, groups=self.groups, + ) + if prev: + dis.extend_buffer(prev) + dis.flush() + + def stop(self, c): + pass + + def _close(self, c): + if c.event_dispatcher: + dispatcher = c.event_dispatcher + # remember changes from remote control commands: + self.groups = dispatcher.groups + + # close custom connection + if dispatcher.connection: + ignore_errors(c, dispatcher.connection.close) + ignore_errors(c, dispatcher.close) + c.event_dispatcher = None + return dispatcher + + def shutdown(self, c): + self._close(c) + + +class Heart(bootsteps.StartStopStep): + requires = (Events, ) + + def __init__(self, c, without_heartbeat=False, heartbeat_interval=None, + **kwargs): + self.enabled = not without_heartbeat + self.heartbeat_interval = heartbeat_interval + c.heart = None + + def start(self, c): + c.heart = heartbeat.Heart( + c.timer, c.event_dispatcher, self.heartbeat_interval, + ) + c.heart.start() + + def stop(self, c): + c.heart = c.heart and c.heart.stop() + shutdown = stop + + +class Mingle(bootsteps.StartStopStep): + label = 'Mingle' + requires = (Events, ) + compatible_transports = set(['amqp', 'redis']) + + def __init__(self, c, without_mingle=False, **kwargs): + self.enabled = not without_mingle and self.compatible_transport(c.app) + + def compatible_transport(self, app): + with app.connection() as conn: + return conn.transport.driver_type in self.compatible_transports + + def start(self, c): + info('mingle: searching for neighbors') + I = c.app.control.inspect(timeout=1.0, connection=c.connection) + replies = I.hello(c.hostname, revoked._data) or {} + replies.pop(c.hostname, None) + if replies: + info('mingle: sync with %s nodes', + len([reply for reply, value in items(replies) if value])) + for reply in values(replies): + if reply: + try: + other_clock, other_revoked = MINGLE_GET_FIELDS(reply) + except KeyError: # reply from pre-3.1 worker + pass + else: + c.app.clock.adjust(other_clock) + revoked.update(other_revoked) + info('mingle: sync complete') + else: + info('mingle: all alone') + + +class Tasks(bootsteps.StartStopStep): + requires = (Mingle, ) + + def __init__(self, c, **kwargs): + c.task_consumer = c.qos = None + + def start(self, c): + c.update_strategies() + + # - RabbitMQ 3.3 completely redefines how basic_qos works.. + # This will detect if the new qos smenatics is in effect, + # and if so make sure the 'apply_global' flag is set on qos updates. + qos_global = not c.connection.qos_semantics_matches_spec + + # set initial prefetch count + c.connection.default_channel.basic_qos( + 0, c.initial_prefetch_count, qos_global, + ) + + c.task_consumer = c.app.amqp.TaskConsumer( + c.connection, on_decode_error=c.on_decode_error, + ) + + def set_prefetch_count(prefetch_count): + return c.task_consumer.qos( + prefetch_count=prefetch_count, + apply_global=qos_global, + ) + c.qos = QoS(set_prefetch_count, c.initial_prefetch_count) + + def stop(self, c): + if c.task_consumer: + debug('Cancelling task consumer...') + ignore_errors(c, c.task_consumer.cancel) + + def shutdown(self, c): + if c.task_consumer: + self.stop(c) + debug('Closing consumer channel...') + ignore_errors(c, c.task_consumer.close) + c.task_consumer = None + + def info(self, c): + return {'prefetch_count': c.qos.value if c.qos else 'N/A'} + + +class Agent(bootsteps.StartStopStep): + conditional = True + requires = (Connection, ) + + def __init__(self, c, **kwargs): + self.agent_cls = self.enabled = c.app.conf.CELERYD_AGENT + + def create(self, c): + agent = c.agent = self.instantiate(self.agent_cls, c.connection) + return agent + + +class Control(bootsteps.StartStopStep): + requires = (Tasks, ) + + def __init__(self, c, **kwargs): + self.is_green = c.pool is not None and c.pool.is_green + self.box = (pidbox.gPidbox if self.is_green else pidbox.Pidbox)(c) + self.start = self.box.start + self.stop = self.box.stop + self.shutdown = self.box.shutdown + + def include_if(self, c): + return c.app.conf.CELERY_ENABLE_REMOTE_CONTROL + + +class Gossip(bootsteps.ConsumerStep): + label = 'Gossip' + requires = (Mingle, ) + _cons_stamp_fields = itemgetter( + 'id', 'clock', 'hostname', 'pid', 'topic', 'action', 'cver', + ) + compatible_transports = set(['amqp', 'redis']) + + def __init__(self, c, without_gossip=False, interval=5.0, **kwargs): + self.enabled = not without_gossip and self.compatible_transport(c.app) + self.app = c.app + c.gossip = self + self.Receiver = c.app.events.Receiver + self.hostname = c.hostname + self.full_hostname = '.'.join([self.hostname, str(c.pid)]) + + self.timer = c.timer + if self.enabled: + self.state = c.app.events.State( + on_node_join=self.on_node_join, + on_node_leave=self.on_node_leave, + max_tasks_in_memory=1, + ) + if c.hub: + c._mutex = DummyLock() + self.update_state = self.state.event + self.interval = interval + self._tref = None + self.consensus_requests = defaultdict(list) + self.consensus_replies = {} + self.event_handlers = { + 'worker.elect': self.on_elect, + 'worker.elect.ack': self.on_elect_ack, + } + self.clock = c.app.clock + + self.election_handlers = { + 'task': self.call_task + } + + def compatible_transport(self, app): + with app.connection() as conn: + return conn.transport.driver_type in self.compatible_transports + + def election(self, id, topic, action=None): + self.consensus_replies[id] = [] + self.dispatcher.send( + 'worker-elect', + id=id, topic=topic, action=action, cver=1, + ) + + def call_task(self, task): + try: + signature(task, app=self.app).apply_async() + except Exception as exc: + error('Could not call task: %r', exc, exc_info=1) + + def on_elect(self, event): + try: + (id_, clock, hostname, pid, + topic, action, _) = self._cons_stamp_fields(event) + except KeyError as exc: + return error('election request missing field %s', exc, exc_info=1) + heappush( + self.consensus_requests[id_], + (clock, '%s.%s' % (hostname, pid), topic, action), + ) + self.dispatcher.send('worker-elect-ack', id=id_) + + def start(self, c): + super(Gossip, self).start(c) + self.dispatcher = c.event_dispatcher + + def on_elect_ack(self, event): + id = event['id'] + try: + replies = self.consensus_replies[id] + except KeyError: + return # not for us + alive_workers = self.state.alive_workers() + replies.append(event['hostname']) + + if len(replies) >= len(alive_workers): + _, leader, topic, action = self.clock.sort_heap( + self.consensus_requests[id], + ) + if leader == self.full_hostname: + info('I won the election %r', id) + try: + handler = self.election_handlers[topic] + except KeyError: + error('Unknown election topic %r', topic, exc_info=1) + else: + handler(action) + else: + info('node %s elected for %r', leader, id) + self.consensus_requests.pop(id, None) + self.consensus_replies.pop(id, None) + + def on_node_join(self, worker): + debug('%s joined the party', worker.hostname) + + def on_node_leave(self, worker): + debug('%s left', worker.hostname) + + def on_node_lost(self, worker): + info('missed heartbeat from %s', worker.hostname) + + def register_timer(self): + if self._tref is not None: + self._tref.cancel() + self._tref = self.timer.call_repeatedly(self.interval, self.periodic) + + def periodic(self): + workers = self.state.workers + dirty = set() + for worker in values(workers): + if not worker.alive: + dirty.add(worker) + self.on_node_lost(worker) + for worker in dirty: + workers.pop(worker.hostname, None) + + def get_consumers(self, channel): + self.register_timer() + ev = self.Receiver(channel, routing_key='worker.#') + return [kombu.Consumer( + channel, + queues=[ev.queue], + on_message=partial(self.on_message, ev.event_from_message), + no_ack=True + )] + + def on_message(self, prepare, message): + _type = message.delivery_info['routing_key'] + + # For redis when `fanout_patterns=False` (See Issue #1882) + if _type.split('.', 1)[0] == 'task': + return + try: + handler = self.event_handlers[_type] + except KeyError: + pass + else: + return handler(message.payload) + + hostname = (message.headers.get('hostname') or + message.payload['hostname']) + if hostname != self.hostname: + type, event = prepare(message.payload) + self.update_state(event) + else: + self.clock.forward() + + +class Evloop(bootsteps.StartStopStep): + label = 'event loop' + last = True + + def start(self, c): + self.patch_all(c) + c.loop(*c.loop_args()) + + def patch_all(self, c): + c.qos._mutex = DummyLock() diff --git a/celery/worker/control.py b/celery/worker/control.py new file mode 100644 index 0000000..6a02f6d --- /dev/null +++ b/celery/worker/control.py @@ -0,0 +1,385 @@ +# -*- coding: utf-8 -*- +""" + celery.worker.control + ~~~~~~~~~~~~~~~~~~~~~ + + Remote control commands. + +""" +from __future__ import absolute_import + +import io +import tempfile + +from kombu.utils.encoding import safe_repr + +from celery.exceptions import WorkerShutdown +from celery.five import UserDict, items, string_t +from celery.platforms import signals as _signals +from celery.utils import timeutils +from celery.utils.functional import maybe_list +from celery.utils.log import get_logger +from celery.utils import jsonify + +from . import state as worker_state +from .state import revoked +from .job import Request + +__all__ = ['Panel'] +DEFAULT_TASK_INFO_ITEMS = ('exchange', 'routing_key', 'rate_limit') +logger = get_logger(__name__) + + +class Panel(UserDict): + data = dict() # Global registry. + + @classmethod + def register(cls, method, name=None): + cls.data[name or method.__name__] = method + return method + + +def _find_requests_by_id(ids, requests): + found, total = 0, len(ids) + for request in requests: + if request.id in ids: + yield request + found += 1 + if found >= total: + break + + +@Panel.register +def query_task(state, ids, **kwargs): + ids = maybe_list(ids) + + def reqinfo(state, req): + return state, req.info() + + reqs = dict((req.id, ('reserved', req.info())) + for req in _find_requests_by_id( + ids, worker_state.reserved_requests)) + reqs.update(dict( + (req.id, ('active', req.info())) + for req in _find_requests_by_id( + ids, worker_state.active_requests, + ) + )) + + return reqs + + +@Panel.register +def revoke(state, task_id, terminate=False, signal=None, **kwargs): + """Revoke task by task id.""" + # supports list argument since 3.1 + task_ids, task_id = set(maybe_list(task_id) or []), None + size = len(task_ids) + terminated = set() + + revoked.update(task_ids) + if terminate: + signum = _signals.signum(signal or 'TERM') + # reserved_requests changes size during iteration + # so need to consume the items first, then terminate after. + requests = set(_find_requests_by_id( + task_ids, + worker_state.reserved_requests, + )) + for request in requests: + if request.id not in terminated: + terminated.add(request.id) + logger.info('Terminating %s (%s)', request.id, signum) + request.terminate(state.consumer.pool, signal=signum) + if len(terminated) >= size: + break + + if not terminated: + return {'ok': 'terminate: tasks unknown'} + return {'ok': 'terminate: {0}'.format(', '.join(terminated))} + + idstr = ', '.join(task_ids) + logger.info('Tasks flagged as revoked: %s', idstr) + return {'ok': 'tasks {0} flagged as revoked'.format(idstr)} + + +@Panel.register +def report(state): + return {'ok': state.app.bugreport()} + + +@Panel.register +def enable_events(state): + dispatcher = state.consumer.event_dispatcher + if 'task' not in dispatcher.groups: + dispatcher.groups.add('task') + logger.info('Events of group {task} enabled by remote.') + return {'ok': 'task events enabled'} + return {'ok': 'task events already enabled'} + + +@Panel.register +def disable_events(state): + dispatcher = state.consumer.event_dispatcher + if 'task' in dispatcher.groups: + dispatcher.groups.discard('task') + logger.info('Events of group {task} disabled by remote.') + return {'ok': 'task events disabled'} + return {'ok': 'task events already disabled'} + + +@Panel.register +def heartbeat(state): + logger.debug('Heartbeat requested by remote.') + dispatcher = state.consumer.event_dispatcher + dispatcher.send('worker-heartbeat', freq=5, **worker_state.SOFTWARE_INFO) + + +@Panel.register +def rate_limit(state, task_name, rate_limit, **kwargs): + """Set new rate limit for a task type. + + See :attr:`celery.task.base.Task.rate_limit`. + + :param task_name: Type of task. + :param rate_limit: New rate limit. + + """ + + try: + timeutils.rate(rate_limit) + except ValueError as exc: + return {'error': 'Invalid rate limit string: {0!r}'.format(exc)} + + try: + state.app.tasks[task_name].rate_limit = rate_limit + except KeyError: + logger.error('Rate limit attempt for unknown task %s', + task_name, exc_info=True) + return {'error': 'unknown task'} + + state.consumer.reset_rate_limits() + + if not rate_limit: + logger.info('Rate limits disabled for tasks of type %s', task_name) + return {'ok': 'rate limit disabled successfully'} + + logger.info('New rate limit for tasks of type %s: %s.', + task_name, rate_limit) + return {'ok': 'new rate limit set successfully'} + + +@Panel.register +def time_limit(state, task_name=None, hard=None, soft=None, **kwargs): + try: + task = state.app.tasks[task_name] + except KeyError: + logger.error('Change time limit attempt for unknown task %s', + task_name, exc_info=True) + return {'error': 'unknown task'} + + task.soft_time_limit = soft + task.time_limit = hard + + logger.info('New time limits for tasks of type %s: soft=%s hard=%s', + task_name, soft, hard) + return {'ok': 'time limits set successfully'} + + +@Panel.register +def dump_schedule(state, safe=False, **kwargs): + + def prepare_entries(): + for waiting in state.consumer.timer.schedule.queue: + try: + arg0 = waiting.entry.args[0] + except (IndexError, TypeError): + continue + else: + if isinstance(arg0, Request): + yield {'eta': arg0.eta.isoformat() if arg0.eta else None, + 'priority': waiting.priority, + 'request': arg0.info(safe=safe)} + return list(prepare_entries()) + + +@Panel.register +def dump_reserved(state, safe=False, **kwargs): + reserved = worker_state.reserved_requests - worker_state.active_requests + if not reserved: + return [] + return [request.info(safe=safe) for request in reserved] + + +@Panel.register +def dump_active(state, safe=False, **kwargs): + return [request.info(safe=safe) + for request in worker_state.active_requests] + + +@Panel.register +def stats(state, **kwargs): + return state.consumer.controller.stats() + + +@Panel.register +def objgraph(state, num=200, max_depth=10, type='Request'): # pragma: no cover + try: + import objgraph + except ImportError: + raise ImportError('Requires the objgraph library') + print('Dumping graph for type %r' % (type, )) + with tempfile.NamedTemporaryFile(prefix='cobjg', + suffix='.png', delete=False) as fh: + objects = objgraph.by_type(type)[:num] + objgraph.show_backrefs( + objects, + max_depth=max_depth, highlight=lambda v: v in objects, + filename=fh.name, + ) + return {'filename': fh.name} + + +@Panel.register +def memsample(state, **kwargs): # pragma: no cover + from celery.utils.debug import sample_mem + return sample_mem() + + +@Panel.register +def memdump(state, samples=10, **kwargs): # pragma: no cover + from celery.utils.debug import memdump + out = io.StringIO() + memdump(file=out) + return out.getvalue() + + +@Panel.register +def clock(state, **kwargs): + return {'clock': state.app.clock.value} + + +@Panel.register +def dump_revoked(state, **kwargs): + return list(worker_state.revoked) + + +@Panel.register +def hello(state, from_node, revoked=None, **kwargs): + if from_node != state.hostname: + logger.info('sync with %s', from_node) + if revoked: + worker_state.revoked.update(revoked) + return {'revoked': worker_state.revoked._data, + 'clock': state.app.clock.forward()} + + +@Panel.register +def dump_tasks(state, taskinfoitems=None, builtins=False, **kwargs): + reg = state.app.tasks + taskinfoitems = taskinfoitems or DEFAULT_TASK_INFO_ITEMS + + tasks = reg if builtins else ( + task for task in reg if not task.startswith('celery.')) + + def _extract_info(task): + fields = dict((field, str(getattr(task, field, None))) + for field in taskinfoitems + if getattr(task, field, None) is not None) + if fields: + info = ['='.join(f) for f in items(fields)] + return '{0} [{1}]'.format(task.name, ' '.join(info)) + return task.name + + return [_extract_info(reg[task]) for task in sorted(tasks)] + + +@Panel.register +def ping(state, **kwargs): + return {'ok': 'pong'} + + +@Panel.register +def pool_grow(state, n=1, **kwargs): + if state.consumer.controller.autoscaler: + state.consumer.controller.autoscaler.force_scale_up(n) + else: + state.consumer.pool.grow(n) + state.consumer._update_prefetch_count(n) + return {'ok': 'pool will grow'} + + +@Panel.register +def pool_shrink(state, n=1, **kwargs): + if state.consumer.controller.autoscaler: + state.consumer.controller.autoscaler.force_scale_down(n) + else: + state.consumer.pool.shrink(n) + state.consumer._update_prefetch_count(-n) + return {'ok': 'pool will shrink'} + + +@Panel.register +def pool_restart(state, modules=None, reload=False, reloader=None, **kwargs): + if state.app.conf.CELERYD_POOL_RESTARTS: + state.consumer.controller.reload(modules, reload, reloader=reloader) + return {'ok': 'reload started'} + else: + raise ValueError('Pool restarts not enabled') + + +@Panel.register +def autoscale(state, max=None, min=None): + autoscaler = state.consumer.controller.autoscaler + if autoscaler: + max_, min_ = autoscaler.update(max, min) + return {'ok': 'autoscale now min={0} max={1}'.format(max_, min_)} + raise ValueError('Autoscale not enabled') + + +@Panel.register +def shutdown(state, msg='Got shutdown from remote', **kwargs): + logger.warning(msg) + raise WorkerShutdown(msg) + + +@Panel.register +def add_consumer(state, queue, exchange=None, exchange_type=None, + routing_key=None, **options): + state.consumer.add_task_queue(queue, exchange, exchange_type, + routing_key, **options) + return {'ok': 'add consumer {0}'.format(queue)} + + +@Panel.register +def cancel_consumer(state, queue=None, **_): + state.consumer.cancel_task_queue(queue) + return {'ok': 'no longer consuming from {0}'.format(queue)} + + +@Panel.register +def active_queues(state): + """Return information about the queues a worker consumes from.""" + if state.consumer.task_consumer: + return [dict(queue.as_dict(recurse=True)) + for queue in state.consumer.task_consumer.queues] + return [] + + +def _wanted_config_key(key): + return (isinstance(key, string_t) and + key.isupper() and + not key.startswith('__')) + + +@Panel.register +def dump_conf(state, with_defaults=False, **kwargs): + return jsonify(state.app.conf.table(with_defaults=with_defaults), + keyfilter=_wanted_config_key, + unknown_type_filter=safe_repr) + + +@Panel.register +def election(state, id, topic, action=None, **kwargs): + if state.consumer.gossip: + state.consumer.gossip.election(id, topic, action) diff --git a/celery/worker/heartbeat.py b/celery/worker/heartbeat.py new file mode 100644 index 0000000..cf46ab0 --- /dev/null +++ b/celery/worker/heartbeat.py @@ -0,0 +1,58 @@ +# -*- coding: utf-8 -*- +""" + celery.worker.heartbeat + ~~~~~~~~~~~~~~~~~~~~~~~ + + This is the internal thread that sends heartbeat events + at regular intervals. + +""" +from __future__ import absolute_import + +from celery.utils.sysinfo import load_average + +from .state import SOFTWARE_INFO, active_requests, all_total_count + +__all__ = ['Heart'] + + +class Heart(object): + """Timer sending heartbeats at regular intervals. + + :param timer: Timer instance. + :param eventer: Event dispatcher used to send the event. + :keyword interval: Time in seconds between heartbeats. + Default is 2 seconds. + + """ + + def __init__(self, timer, eventer, interval=None): + self.timer = timer + self.eventer = eventer + self.interval = float(interval or 2.0) + self.tref = None + + # Make event dispatcher start/stop us when enabled/disabled. + self.eventer.on_enabled.add(self.start) + self.eventer.on_disabled.add(self.stop) + + def _send(self, event): + return self.eventer.send(event, freq=self.interval, + active=len(active_requests), + processed=all_total_count[0], + loadavg=load_average(), + **SOFTWARE_INFO) + + def start(self): + if self.eventer.enabled: + self._send('worker-online') + self.tref = self.timer.call_repeatedly( + self.interval, self._send, ('worker-heartbeat', ), + ) + + def stop(self): + if self.tref is not None: + self.timer.cancel(self.tref) + self.tref = None + if self.eventer.enabled: + self._send('worker-offline') diff --git a/celery/worker/job.py b/celery/worker/job.py new file mode 100644 index 0000000..3acbcb5 --- /dev/null +++ b/celery/worker/job.py @@ -0,0 +1,592 @@ +# -*- coding: utf-8 -*- +""" + celery.worker.job + ~~~~~~~~~~~~~~~~~ + + This module defines the :class:`Request` class, + which specifies how tasks are executed. + +""" +from __future__ import absolute_import, unicode_literals + +import logging +import socket +import sys + +from billiard.einfo import ExceptionInfo +from datetime import datetime +from weakref import ref + +from kombu.utils import kwdict, reprcall +from kombu.utils.encoding import safe_repr, safe_str + +from celery import signals +from celery.app.trace import trace_task, trace_task_ret +from celery.exceptions import ( + Ignore, TaskRevokedError, InvalidTaskError, + SoftTimeLimitExceeded, TimeLimitExceeded, + WorkerLostError, Terminated, Retry, Reject, +) +from celery.five import items, monotonic, string, string_t +from celery.platforms import signals as _signals +from celery.utils import fun_takes_kwargs +from celery.utils.functional import noop +from celery.utils.log import get_logger +from celery.utils.serialization import get_pickled_exception +from celery.utils.text import truncate +from celery.utils.timeutils import maybe_iso8601, timezone, maybe_make_aware + +from . import state + +__all__ = ['Request'] + +IS_PYPY = hasattr(sys, 'pypy_version_info') + +logger = get_logger(__name__) +debug, info, warn, error = (logger.debug, logger.info, + logger.warning, logger.error) +_does_info = False +_does_debug = False + +#: Max length of result representation +RESULT_MAXLEN = 128 + + +def __optimize__(): + # this is also called by celery.app.trace.setup_worker_optimizations + global _does_debug + global _does_info + _does_debug = logger.isEnabledFor(logging.DEBUG) + _does_info = logger.isEnabledFor(logging.INFO) +__optimize__() + +# Localize +tz_utc = timezone.utc +tz_or_local = timezone.tz_or_local +send_revoked = signals.task_revoked.send + +task_accepted = state.task_accepted +task_ready = state.task_ready +revoked_tasks = state.revoked + +NEEDS_KWDICT = sys.version_info <= (2, 6) + +#: Use when no message object passed to :class:`Request`. +DEFAULT_FIELDS = { + 'headers': None, + 'reply_to': None, + 'correlation_id': None, + 'delivery_info': { + 'exchange': None, + 'routing_key': None, + 'priority': 0, + 'redelivered': False, + }, +} + + +class Request(object): + """A request for task execution.""" + if not IS_PYPY: # pragma: no cover + __slots__ = ( + 'app', 'name', 'id', 'args', 'kwargs', 'on_ack', + 'hostname', 'eventer', 'connection_errors', 'task', 'eta', + 'expires', 'request_dict', 'acknowledged', 'on_reject', + 'utc', 'time_start', 'worker_pid', '_already_revoked', + '_terminate_on_ack', '_apply_result', + '_tzlocal', '__weakref__', '__dict__', + ) + + #: Format string used to log task success. + success_msg = """\ + Task %(name)s[%(id)s] succeeded in %(runtime)ss: %(return_value)s + """ + + #: Format string used to log task failure. + error_msg = """\ + Task %(name)s[%(id)s] %(description)s: %(exc)s + """ + + #: Format string used to log internal error. + internal_error_msg = """\ + Task %(name)s[%(id)s] %(description)s: %(exc)s + """ + + ignored_msg = """\ + Task %(name)s[%(id)s] %(description)s + """ + + rejected_msg = """\ + Task %(name)s[%(id)s] %(exc)s + """ + + #: Format string used to log task retry. + retry_msg = """Task %(name)s[%(id)s] retry: %(exc)s""" + + def __init__(self, body, on_ack=noop, + hostname=None, eventer=None, app=None, + connection_errors=None, request_dict=None, + message=None, task=None, on_reject=noop, **opts): + self.app = app + name = self.name = body['task'] + self.id = body['id'] + self.args = body.get('args', []) + self.kwargs = body.get('kwargs', {}) + try: + self.kwargs.items + except AttributeError: + raise InvalidTaskError( + 'Task keyword arguments is not a mapping') + if NEEDS_KWDICT: + self.kwargs = kwdict(self.kwargs) + eta = body.get('eta') + expires = body.get('expires') + utc = self.utc = body.get('utc', False) + self.on_ack = on_ack + self.on_reject = on_reject + self.hostname = hostname or socket.gethostname() + self.eventer = eventer + self.connection_errors = connection_errors or () + self.task = task or self.app.tasks[name] + self.acknowledged = self._already_revoked = False + self.time_start = self.worker_pid = self._terminate_on_ack = None + self._apply_result = None + self._tzlocal = None + + # timezone means the message is timezone-aware, and the only timezone + # supported at this point is UTC. + if eta is not None: + try: + self.eta = maybe_iso8601(eta) + except (AttributeError, ValueError, TypeError) as exc: + raise InvalidTaskError( + 'invalid eta value {0!r}: {1}'.format(eta, exc)) + if utc: + self.eta = maybe_make_aware(self.eta, self.tzlocal) + else: + self.eta = None + if expires is not None: + try: + self.expires = maybe_iso8601(expires) + except (AttributeError, ValueError, TypeError) as exc: + raise InvalidTaskError( + 'invalid expires value {0!r}: {1}'.format(expires, exc)) + if utc: + self.expires = maybe_make_aware(self.expires, self.tzlocal) + else: + self.expires = None + + if message: + delivery_info = message.delivery_info or {} + properties = message.properties or {} + body.update({ + 'headers': message.headers, + 'reply_to': properties.get('reply_to'), + 'correlation_id': properties.get('correlation_id'), + 'delivery_info': { + 'exchange': delivery_info.get('exchange'), + 'routing_key': delivery_info.get('routing_key'), + 'priority': delivery_info.get('priority'), + 'redelivered': delivery_info.get('redelivered'), + } + + }) + else: + body.update(DEFAULT_FIELDS) + self.request_dict = body + + @property + def delivery_info(self): + return self.request_dict['delivery_info'] + + def extend_with_default_kwargs(self): + """Extend the tasks keyword arguments with standard task arguments. + + Currently these are `logfile`, `loglevel`, `task_id`, + `task_name`, `task_retries`, and `delivery_info`. + + See :meth:`celery.task.base.Task.run` for more information. + + Magic keyword arguments are deprecated and will be removed + in version 4.0. + + """ + kwargs = dict(self.kwargs) + default_kwargs = {'logfile': None, # deprecated + 'loglevel': None, # deprecated + 'task_id': self.id, + 'task_name': self.name, + 'task_retries': self.request_dict.get('retries', 0), + 'task_is_eager': False, + 'delivery_info': self.delivery_info} + fun = self.task.run + supported_keys = fun_takes_kwargs(fun, default_kwargs) + extend_with = dict((key, val) for key, val in items(default_kwargs) + if key in supported_keys) + kwargs.update(extend_with) + return kwargs + + def execute_using_pool(self, pool, **kwargs): + """Used by the worker to send this task to the pool. + + :param pool: A :class:`celery.concurrency.base.TaskPool` instance. + + :raises celery.exceptions.TaskRevokedError: if the task was revoked + and ignored. + + """ + uuid = self.id + task = self.task + if self.revoked(): + raise TaskRevokedError(uuid) + + hostname = self.hostname + kwargs = self.kwargs + if task.accept_magic_kwargs: + kwargs = self.extend_with_default_kwargs() + request = self.request_dict + request.update({'hostname': hostname, 'is_eager': False, + 'delivery_info': self.delivery_info, + 'group': self.request_dict.get('taskset')}) + timeout, soft_timeout = request.get('timelimit', (None, None)) + timeout = timeout or task.time_limit + soft_timeout = soft_timeout or task.soft_time_limit + result = pool.apply_async( + trace_task_ret, + args=(self.name, uuid, self.args, kwargs, request), + accept_callback=self.on_accepted, + timeout_callback=self.on_timeout, + callback=self.on_success, + error_callback=self.on_failure, + soft_timeout=soft_timeout, + timeout=timeout, + correlation_id=uuid, + ) + # cannot create weakref to None + self._apply_result = ref(result) if result is not None else result + return result + + def execute(self, loglevel=None, logfile=None): + """Execute the task in a :func:`~celery.app.trace.trace_task`. + + :keyword loglevel: The loglevel used by the task. + :keyword logfile: The logfile used by the task. + + """ + if self.revoked(): + return + + # acknowledge task as being processed. + if not self.task.acks_late: + self.acknowledge() + + kwargs = self.kwargs + if self.task.accept_magic_kwargs: + kwargs = self.extend_with_default_kwargs() + request = self.request_dict + request.update({'loglevel': loglevel, 'logfile': logfile, + 'hostname': self.hostname, 'is_eager': False, + 'delivery_info': self.delivery_info}) + retval = trace_task(self.task, self.id, self.args, kwargs, request, + hostname=self.hostname, loader=self.app.loader, + app=self.app) + self.acknowledge() + return retval + + def maybe_expire(self): + """If expired, mark the task as revoked.""" + if self.expires: + now = datetime.now(tz_or_local(self.tzlocal) if self.utc else None) + if now > self.expires: + revoked_tasks.add(self.id) + return True + + def terminate(self, pool, signal=None): + signal = _signals.signum(signal or 'TERM') + if self.time_start: + pool.terminate_job(self.worker_pid, signal) + self._announce_revoked('terminated', True, signal, False) + else: + self._terminate_on_ack = pool, signal + if self._apply_result is not None: + obj = self._apply_result() # is a weakref + if obj is not None: + obj.terminate(signal) + + def _announce_revoked(self, reason, terminated, signum, expired): + task_ready(self) + self.send_event('task-revoked', + terminated=terminated, signum=signum, expired=expired) + if self.store_errors: + self.task.backend.mark_as_revoked(self.id, reason, request=self) + self.acknowledge() + self._already_revoked = True + send_revoked(self.task, request=self, + terminated=terminated, signum=signum, expired=expired) + + def revoked(self): + """If revoked, skip task and mark state.""" + expired = False + if self._already_revoked: + return True + if self.expires: + expired = self.maybe_expire() + if self.id in revoked_tasks: + info('Discarding revoked task: %s[%s]', self.name, self.id) + self._announce_revoked( + 'expired' if expired else 'revoked', False, None, expired, + ) + return True + return False + + def send_event(self, type, **fields): + if self.eventer and self.eventer.enabled: + self.eventer.send(type, uuid=self.id, **fields) + + def on_accepted(self, pid, time_accepted): + """Handler called when task is accepted by worker pool.""" + self.worker_pid = pid + self.time_start = time_accepted + task_accepted(self) + if not self.task.acks_late: + self.acknowledge() + self.send_event('task-started') + if _does_debug: + debug('Task accepted: %s[%s] pid:%r', self.name, self.id, pid) + if self._terminate_on_ack is not None: + self.terminate(*self._terminate_on_ack) + + def on_timeout(self, soft, timeout): + """Handler called if the task times out.""" + task_ready(self) + if soft: + warn('Soft time limit (%ss) exceeded for %s[%s]', + timeout, self.name, self.id) + exc = SoftTimeLimitExceeded(timeout) + else: + error('Hard time limit (%ss) exceeded for %s[%s]', + timeout, self.name, self.id) + exc = TimeLimitExceeded(timeout) + + if self.store_errors: + self.task.backend.mark_as_failure(self.id, exc, request=self) + + if self.task.acks_late: + self.acknowledge() + + def on_success(self, ret_value, now=None, nowfun=monotonic): + """Handler called if the task was successfully processed.""" + if isinstance(ret_value, ExceptionInfo): + if isinstance(ret_value.exception, ( + SystemExit, KeyboardInterrupt)): + raise ret_value.exception + return self.on_failure(ret_value) + task_ready(self) + + if self.task.acks_late: + self.acknowledge() + + if self.eventer and self.eventer.enabled: + now = nowfun() + runtime = self.time_start and (now - self.time_start) or 0 + self.send_event('task-succeeded', + result=safe_repr(ret_value), runtime=runtime) + + if _does_info: + now = now or nowfun() + runtime = self.time_start and (now - self.time_start) or 0 + info(self.success_msg.strip(), { + 'id': self.id, 'name': self.name, + 'return_value': self.repr_result(ret_value), + 'runtime': runtime}) + + def on_retry(self, exc_info): + """Handler called if the task should be retried.""" + if self.task.acks_late: + self.acknowledge() + + self.send_event('task-retried', + exception=safe_repr(exc_info.exception.exc), + traceback=safe_str(exc_info.traceback)) + + if _does_info: + info(self.retry_msg.strip(), + {'id': self.id, 'name': self.name, + 'exc': exc_info.exception}) + + def on_failure(self, exc_info): + """Handler called if the task raised an exception.""" + task_ready(self) + send_failed_event = True + + if not exc_info.internal: + exc = exc_info.exception + + if isinstance(exc, Retry): + return self.on_retry(exc_info) + + # These are special cases where the process would not have had + # time to write the result. + if self.store_errors: + if isinstance(exc, WorkerLostError): + self.task.backend.mark_as_failure( + self.id, exc, request=self, + ) + elif isinstance(exc, Terminated): + self._announce_revoked( + 'terminated', True, string(exc), False) + send_failed_event = False # already sent revoked event + # (acks_late) acknowledge after result stored. + if self.task.acks_late: + self.acknowledge() + self._log_error(exc_info, send_failed_event=send_failed_event) + + def _log_error(self, einfo, send_failed_event=True): + einfo.exception = get_pickled_exception(einfo.exception) + eobj = einfo.exception + exception, traceback, exc_info, internal, sargs, skwargs = ( + safe_repr(eobj), + safe_str(einfo.traceback), + einfo.exc_info, + einfo.internal, + safe_repr(self.args), + safe_repr(self.kwargs), + ) + task = self.task + if task.throws and isinstance(eobj, task.throws): + do_send_mail, severity, exc_info, description = ( + False, logging.INFO, None, 'raised expected', + ) + else: + do_send_mail, severity, description = ( + True, logging.ERROR, 'raised unexpected', + ) + format = self.error_msg + if send_failed_event: + self.send_event( + 'task-failed', exception=exception, traceback=traceback, + ) + + if internal: + if isinstance(einfo.exception, MemoryError): + raise MemoryError('Process got: %s' % (einfo.exception, )) + elif isinstance(einfo.exception, Reject): + format = self.rejected_msg + description = 'rejected' + severity = logging.WARN + exc_info = einfo + self.reject(requeue=einfo.exception.requeue) + elif isinstance(einfo.exception, Ignore): + format = self.ignored_msg + description = 'ignored' + severity = logging.INFO + exc_info = None + self.acknowledge() + else: + format = self.internal_error_msg + description = 'INTERNAL ERROR' + severity = logging.CRITICAL + + context = { + 'hostname': self.hostname, + 'id': self.id, + 'name': self.name, + 'exc': exception, + 'traceback': traceback, + 'args': sargs, + 'kwargs': skwargs, + 'description': description, + } + + logger.log(severity, format.strip(), context, + exc_info=exc_info, + extra={'data': {'id': self.id, + 'name': self.name, + 'args': sargs, + 'kwargs': skwargs, + 'hostname': self.hostname, + 'internal': internal}}) + + if do_send_mail: + task.send_error_email(context, einfo.exception) + + def acknowledge(self): + """Acknowledge task.""" + if not self.acknowledged: + self.on_ack(logger, self.connection_errors) + self.acknowledged = True + + def reject(self, requeue=False): + if not self.acknowledged: + self.on_reject(logger, self.connection_errors, requeue) + self.acknowledged = True + + def repr_result(self, result, maxlen=RESULT_MAXLEN): + # 46 is the length needed to fit + # 'the quick brown fox jumps over the lazy dog' :) + if not isinstance(result, string_t): + result = safe_repr(result) + return truncate(result) if len(result) > maxlen else result + + def info(self, safe=False): + return {'id': self.id, + 'name': self.name, + 'args': self.args if safe else safe_repr(self.args), + 'kwargs': self.kwargs if safe else safe_repr(self.kwargs), + 'hostname': self.hostname, + 'time_start': self.time_start, + 'acknowledged': self.acknowledged, + 'delivery_info': self.delivery_info, + 'worker_pid': self.worker_pid} + + def __str__(self): + return '{0.name}[{0.id}]{1}{2}'.format( + self, + ' eta:[{0}]'.format(self.eta) if self.eta else '', + ' expires:[{0}]'.format(self.expires) if self.expires else '', + ) + shortinfo = __str__ + + def __repr__(self): + return '<{0} {1}: {2}>'.format( + type(self).__name__, self.id, + reprcall(self.name, self.args, self.kwargs)) + + @property + def tzlocal(self): + if self._tzlocal is None: + self._tzlocal = self.app.conf.CELERY_TIMEZONE + return self._tzlocal + + @property + def store_errors(self): + return (not self.task.ignore_result + or self.task.store_errors_even_if_ignored) + + @property + def task_id(self): + # XXX compat + return self.id + + @task_id.setter # noqa + def task_id(self, value): + self.id = value + + @property + def task_name(self): + # XXX compat + return self.name + + @task_name.setter # noqa + def task_name(self, value): + self.name = value + + @property + def reply_to(self): + # used by rpc backend when failures reported by parent process + return self.request_dict['reply_to'] + + @property + def correlation_id(self): + # used similarly to reply_to + return self.request_dict['correlation_id'] diff --git a/celery/worker/loops.py b/celery/worker/loops.py new file mode 100644 index 0000000..08bba31 --- /dev/null +++ b/celery/worker/loops.py @@ -0,0 +1,102 @@ +""" +celery.worker.loop +~~~~~~~~~~~~~~~~~~ + +The consumers highly-optimized inner loop. + +""" +from __future__ import absolute_import + +import socket + +from celery.bootsteps import RUN +from celery.exceptions import WorkerShutdown, WorkerTerminate, WorkerLostError +from celery.utils.log import get_logger + +from . import state + +__all__ = ['asynloop', 'synloop'] + +logger = get_logger(__name__) +error = logger.error + + +def asynloop(obj, connection, consumer, blueprint, hub, qos, + heartbeat, clock, hbrate=2.0, RUN=RUN): + """Non-blocking event loop consuming messages until connection is lost, + or shutdown is requested.""" + update_qos = qos.update + hbtick = connection.heartbeat_check + errors = connection.connection_errors + heartbeat = connection.get_heartbeat_interval() # negotiated + + on_task_received = obj.create_task_handler() + + if heartbeat and connection.supports_heartbeats: + hub.call_repeatedly(heartbeat / hbrate, hbtick, hbrate) + + consumer.callbacks = [on_task_received] + consumer.consume() + obj.on_ready() + obj.controller.register_with_event_loop(hub) + obj.register_with_event_loop(hub) + + # did_start_ok will verify that pool processes were able to start, + # but this will only work the first time we start, as + # maxtasksperchild will mess up metrics. + if not obj.restart_count and not obj.pool.did_start_ok(): + raise WorkerLostError('Could not start worker processes') + + # FIXME: Use loop.run_forever + # Tried and works, but no time to test properly before release. + hub.propagate_errors = errors + loop = hub.create_loop() + + try: + while blueprint.state == RUN and obj.connection: + # shutdown if signal handlers told us to. + if state.should_stop: + raise WorkerShutdown() + elif state.should_terminate: + raise WorkerTerminate() + + # We only update QoS when there is no more messages to read. + # This groups together qos calls, and makes sure that remote + # control commands will be prioritized over task messages. + if qos.prev != qos.value: + update_qos() + + try: + next(loop) + except StopIteration: + loop = hub.create_loop() + finally: + try: + hub.reset() + except Exception as exc: + error( + 'Error cleaning up after event loop: %r', exc, exc_info=1, + ) + + +def synloop(obj, connection, consumer, blueprint, hub, qos, + heartbeat, clock, hbrate=2.0, **kwargs): + """Fallback blocking event loop for transports that doesn't support AIO.""" + + on_task_received = obj.create_task_handler() + consumer.register_callback(on_task_received) + consumer.consume() + + obj.on_ready() + + while blueprint.state == RUN and obj.connection: + state.maybe_shutdown() + if qos.prev != qos.value: + qos.update() + try: + connection.drain_events(timeout=2.0) + except socket.timeout: + pass + except socket.error: + if blueprint.state == RUN: + raise diff --git a/celery/worker/pidbox.py b/celery/worker/pidbox.py new file mode 100644 index 0000000..4a5ae17 --- /dev/null +++ b/celery/worker/pidbox.py @@ -0,0 +1,116 @@ +from __future__ import absolute_import + +import socket +import threading + +from kombu.common import ignore_errors +from kombu.utils.encoding import safe_str + +from celery.datastructures import AttributeDict +from celery.utils.log import get_logger + +from . import control + +__all__ = ['Pidbox', 'gPidbox'] + +logger = get_logger(__name__) +debug, error, info = logger.debug, logger.error, logger.info + + +class Pidbox(object): + consumer = None + + def __init__(self, c): + self.c = c + self.hostname = c.hostname + self.node = c.app.control.mailbox.Node( + safe_str(c.hostname), + handlers=control.Panel.data, + state=AttributeDict(app=c.app, hostname=c.hostname, consumer=c), + ) + self._forward_clock = self.c.app.clock.forward + + def on_message(self, body, message): + # just increase clock as clients usually don't + # have a valid clock to adjust with. + self._forward_clock() + try: + self.node.handle_message(body, message) + except KeyError as exc: + error('No such control command: %s', exc) + except Exception as exc: + error('Control command error: %r', exc, exc_info=True) + self.reset() + + def start(self, c): + self.node.channel = c.connection.channel() + self.consumer = self.node.listen(callback=self.on_message) + self.consumer.on_decode_error = c.on_decode_error + + def on_stop(self): + pass + + def stop(self, c): + self.on_stop() + self.consumer = self._close_channel(c) + + def reset(self): + """Sets up the process mailbox.""" + self.stop(self.c) + self.start(self.c) + + def _close_channel(self, c): + if self.node and self.node.channel: + ignore_errors(c, self.node.channel.close) + + def shutdown(self, c): + self.on_stop() + if self.consumer: + debug('Cancelling broadcast consumer...') + ignore_errors(c, self.consumer.cancel) + self.stop(self.c) + + +class gPidbox(Pidbox): + _node_shutdown = None + _node_stopped = None + _resets = 0 + + def start(self, c): + c.pool.spawn_n(self.loop, c) + + def on_stop(self): + if self._node_stopped: + self._node_shutdown.set() + debug('Waiting for broadcast thread to shutdown...') + self._node_stopped.wait() + self._node_stopped = self._node_shutdown = None + + def reset(self): + self._resets += 1 + + def _do_reset(self, c, connection): + self._close_channel(c) + self.node.channel = connection.channel() + self.consumer = self.node.listen(callback=self.on_message) + self.consumer.consume() + + def loop(self, c): + resets = [self._resets] + shutdown = self._node_shutdown = threading.Event() + stopped = self._node_stopped = threading.Event() + try: + with c.connect() as connection: + + info('pidbox: Connected to %s.', connection.as_uri()) + self._do_reset(c, connection) + while not shutdown.is_set() and c.connection: + if resets[0] < self._resets: + resets[0] += 1 + self._do_reset(c, connection) + try: + connection.drain_events(timeout=1.0) + except socket.timeout: + pass + finally: + stopped.set() diff --git a/celery/worker/state.py b/celery/worker/state.py new file mode 100644 index 0000000..8abaa5d --- /dev/null +++ b/celery/worker/state.py @@ -0,0 +1,238 @@ +# -*- coding: utf-8 -*- +""" + celery.worker.state + ~~~~~~~~~~~~~~~~~~~ + + Internal worker state (global) + + This includes the currently active and reserved tasks, + statistics, and revoked tasks. + +""" +from __future__ import absolute_import + +import os +import sys +import platform +import shelve +import zlib + +from kombu.serialization import pickle, pickle_protocol +from kombu.utils import cached_property + +from celery import __version__ +from celery.datastructures import LimitedSet +from celery.exceptions import WorkerShutdown, WorkerTerminate +from celery.five import Counter + +__all__ = ['SOFTWARE_INFO', 'reserved_requests', 'active_requests', + 'total_count', 'revoked', 'task_reserved', 'maybe_shutdown', + 'task_accepted', 'task_ready', 'task_reserved', 'task_ready', + 'Persistent'] + +#: Worker software/platform information. +SOFTWARE_INFO = {'sw_ident': 'py-celery', + 'sw_ver': __version__, + 'sw_sys': platform.system()} + +#: maximum number of revokes to keep in memory. +REVOKES_MAX = 50000 + +#: how many seconds a revoke will be active before +#: being expired when the max limit has been exceeded. +REVOKE_EXPIRES = 10800 + +#: set of all reserved :class:`~celery.worker.job.Request`'s. +reserved_requests = set() + +#: set of currently active :class:`~celery.worker.job.Request`'s. +active_requests = set() + +#: count of tasks accepted by the worker, sorted by type. +total_count = Counter() + +#: count of all tasks accepted by the worker +all_total_count = [0] + +#: the list of currently revoked tasks. Persistent if statedb set. +revoked = LimitedSet(maxlen=REVOKES_MAX, expires=REVOKE_EXPIRES) + +#: Update global state when a task has been reserved. +task_reserved = reserved_requests.add + +should_stop = False +should_terminate = False + + +def maybe_shutdown(): + if should_stop: + raise WorkerShutdown() + elif should_terminate: + raise WorkerTerminate() + + +def task_accepted(request, _all_total_count=all_total_count): + """Updates global state when a task has been accepted.""" + active_requests.add(request) + total_count[request.name] += 1 + all_total_count[0] += 1 + + +def task_ready(request): + """Updates global state when a task is ready.""" + active_requests.discard(request) + reserved_requests.discard(request) + + +C_BENCH = os.environ.get('C_BENCH') or os.environ.get('CELERY_BENCH') +C_BENCH_EVERY = int(os.environ.get('C_BENCH_EVERY') or + os.environ.get('CELERY_BENCH_EVERY') or 1000) +if C_BENCH: # pragma: no cover + import atexit + + from billiard import current_process + from celery.five import monotonic + from celery.utils.debug import memdump, sample_mem + + all_count = 0 + bench_first = None + bench_start = None + bench_last = None + bench_every = C_BENCH_EVERY + bench_sample = [] + __reserved = task_reserved + __ready = task_ready + + if current_process()._name == 'MainProcess': + @atexit.register + def on_shutdown(): + if bench_first is not None and bench_last is not None: + print('- Time spent in benchmark: {0!r}'.format( + bench_last - bench_first)) + print('- Avg: {0}'.format( + sum(bench_sample) / len(bench_sample))) + memdump() + + def task_reserved(request): # noqa + global bench_start + global bench_first + now = None + if bench_start is None: + bench_start = now = monotonic() + if bench_first is None: + bench_first = now + + return __reserved(request) + + def task_ready(request): # noqa + global all_count + global bench_start + global bench_last + all_count += 1 + if not all_count % bench_every: + now = monotonic() + diff = now - bench_start + print('- Time spent processing {0} tasks (since first ' + 'task received): ~{1:.4f}s\n'.format(bench_every, diff)) + sys.stdout.flush() + bench_start = bench_last = now + bench_sample.append(diff) + sample_mem() + return __ready(request) + + +class Persistent(object): + """This is the persistent data stored by the worker when + :option:`--statedb` is enabled. + + It currently only stores revoked task id's. + + """ + storage = shelve + protocol = pickle_protocol + compress = zlib.compress + decompress = zlib.decompress + _is_open = False + + def __init__(self, state, filename, clock=None): + self.state = state + self.filename = filename + self.clock = clock + self.merge() + + def open(self): + return self.storage.open( + self.filename, protocol=self.protocol, writeback=True, + ) + + def merge(self): + self._merge_with(self.db) + + def sync(self): + self._sync_with(self.db) + self.db.sync() + + def close(self): + if self._is_open: + self.db.close() + self._is_open = False + + def save(self): + self.sync() + self.close() + + def _merge_with(self, d): + self._merge_revoked(d) + self._merge_clock(d) + return d + + def _sync_with(self, d): + self._revoked_tasks.purge() + d.update( + __proto__=3, + zrevoked=self.compress(self._dumps(self._revoked_tasks)), + clock=self.clock.forward() if self.clock else 0, + ) + return d + + def _merge_clock(self, d): + if self.clock: + d['clock'] = self.clock.adjust(d.get('clock') or 0) + + def _merge_revoked(self, d): + try: + self._merge_revoked_v3(d['zrevoked']) + except KeyError: + try: + self._merge_revoked_v2(d.pop('revoked')) + except KeyError: + pass + # purge expired items at boot + self._revoked_tasks.purge() + + def _merge_revoked_v3(self, zrevoked): + if zrevoked: + self._revoked_tasks.update(pickle.loads(self.decompress(zrevoked))) + + def _merge_revoked_v2(self, saved): + if not isinstance(saved, LimitedSet): + # (pre 3.0.18) used to be stored as a dict + return self._merge_revoked_v1(saved) + self._revoked_tasks.update(saved) + + def _merge_revoked_v1(self, saved): + add = self._revoked_tasks.add + for item in saved: + add(item) + + def _dumps(self, obj): + return pickle.dumps(obj, protocol=self.protocol) + + @property + def _revoked_tasks(self): + return self.state.revoked + + @cached_property + def db(self): + self._is_open = True + return self.open() diff --git a/celery/worker/strategy.py b/celery/worker/strategy.py new file mode 100644 index 0000000..0b0d327 --- /dev/null +++ b/celery/worker/strategy.py @@ -0,0 +1,89 @@ +# -*- coding: utf-8 -*- +""" + celery.worker.strategy + ~~~~~~~~~~~~~~~~~~~~~~ + + Task execution strategy (optimization). + +""" +from __future__ import absolute_import + +import logging + +from kombu.async.timer import to_timestamp +from kombu.utils.encoding import safe_repr + +from celery.utils.log import get_logger +from celery.utils.timeutils import timezone + +from .job import Request +from .state import task_reserved + +__all__ = ['default'] + +logger = get_logger(__name__) + + +def default(task, app, consumer, + info=logger.info, error=logger.error, task_reserved=task_reserved, + to_system_tz=timezone.to_system): + hostname = consumer.hostname + eventer = consumer.event_dispatcher + Req = Request + connection_errors = consumer.connection_errors + _does_info = logger.isEnabledFor(logging.INFO) + events = eventer and eventer.enabled + send_event = eventer.send + call_at = consumer.timer.call_at + apply_eta_task = consumer.apply_eta_task + rate_limits_enabled = not consumer.disable_rate_limits + bucket = consumer.task_buckets[task.name] + handle = consumer.on_task_request + limit_task = consumer._limit_task + + def task_message_handler(message, body, ack, reject, callbacks, + to_timestamp=to_timestamp): + req = Req(body, on_ack=ack, on_reject=reject, + app=app, hostname=hostname, + eventer=eventer, task=task, + connection_errors=connection_errors, + message=message) + if req.revoked(): + return + + if _does_info: + info('Received task: %s', req) + + if events: + send_event( + 'task-received', + uuid=req.id, name=req.name, + args=safe_repr(req.args), kwargs=safe_repr(req.kwargs), + retries=req.request_dict.get('retries', 0), + eta=req.eta and req.eta.isoformat(), + expires=req.expires and req.expires.isoformat(), + ) + + if req.eta: + try: + if req.utc: + eta = to_timestamp(to_system_tz(req.eta)) + else: + eta = to_timestamp(req.eta, timezone.local) + except OverflowError as exc: + error("Couldn't convert eta %s to timestamp: %r. Task: %r", + req.eta, exc, req.info(safe=True), exc_info=True) + req.acknowledge() + else: + consumer.qos.increment_eventually() + call_at(eta, apply_eta_task, (req, ), priority=6) + else: + if rate_limits_enabled: + if bucket: + return limit_task(req, bucket, 1) + task_reserved(req) + if callbacks: + [callback() for callback in callbacks] + handle(req) + + return task_message_handler diff --git a/docs/.static/.keep b/docs/.static/.keep new file mode 100644 index 0000000..e69de29 diff --git a/docs/.templates/page.html b/docs/.templates/page.html new file mode 100644 index 0000000..e4d1c21 --- /dev/null +++ b/docs/.templates/page.html @@ -0,0 +1,21 @@ +{% extends "layout.html" %} +{% block body %} +
+ + {% if version == "3.2" or version == "4.0" %} +

+ This document is for Celery's development version, which can be + significantly different from previous releases. Get old docs here: + + 3.0. +

+ {% else %} +

+ This document describes the current stable version of Celery ({{ version }}). For development docs, + go here. +

+ {% endif %} + +
+ {{ body }} +{% endblock %} diff --git a/docs/.templates/sidebarintro.html b/docs/.templates/sidebarintro.html new file mode 100644 index 0000000..1fd55b3 --- /dev/null +++ b/docs/.templates/sidebarintro.html @@ -0,0 +1,11 @@ + + diff --git a/docs/.templates/sidebarlogo.html b/docs/.templates/sidebarlogo.html new file mode 100644 index 0000000..1fd55b3 --- /dev/null +++ b/docs/.templates/sidebarlogo.html @@ -0,0 +1,11 @@ + + diff --git a/docs/AUTHORS.txt b/docs/AUTHORS.txt new file mode 100644 index 0000000..8caea46 --- /dev/null +++ b/docs/AUTHORS.txt @@ -0,0 +1,142 @@ +========= + AUTHORS +========= +:order: sorted + +Aaron Ross +Adam Endicott +Adriano Petrich +Akira Matsuzaki +Alec Clowes +Ales Zoulek +Allan Caffee +Andrew McFague +Andrew Watts +Armin Ronacher +Ask Solem +Augusto Becciu +Balachandran C +Bartosz Ptaszynski +Ben Firshman +Brad Jasper +Branko Čibej +Brendon Crawford +Brian Bouterse +Brian Rosner +Bryan Berg +Chase Seibert +Chris Adams +Chris Angove +Chris Chamberlin +Chris Rose +Chris St. Pierre +Chris Streeter +Christoph Burgmer +Christopher Peplin +Clay Gerrard +Dan McGee +Daniel Hepper +Daniel Lundin +Daniel Watkins +David Arthur +David Cramer +David Miller +David Strauss +David White +Eran Rundstein +Felix Berger +Florian Apolloner +Frédéric Junod +Gert Van Gool +Greg Haskins +Greg Taylor +Grégoire Cachet +Gunnlaugur Thor Briem +Hari +Harm Verhagen +Honza Kral +Ian A Wilson +Ignas Mikalajūnas +Ionel Maries Cristian +Ionut Turturica +Iurii Kriachko +Ivan Metzlar +Jannis Leidel +Jason Baker +Jeff Balogh +Jeff Terrace +Jerzy Kozera +Jesper Noehr +John Watson +Jonas Haag +Jonas Obrist +Jonatan Heyman +Joshua Ginsberg +Juan Ignacio Catalano +Juarez Bochi +Jude Nagurney +Julien Poissonnier +Keith Perkins +Kevin Tran +Kornelijus Survila +Leo Dirac +Luis Clara Gomez +Lukas Linhart +Luke Zapart +Marcin Kuźmiński +Marcin Lulek +Mark Hellewell +Mark Lavin +Mark Stover +Mark Thurman +Martin Galpin +Martin Melin +Matt Williamson +Matthew J Morrison +Matthew Miller +Mauro Rocco +Maxim Bodyansky +Mher Movsisyan +Michael Elsdoerfer +Michael Fladischer +Miguel Hernandez Martos +Mikhail Gusarov +Mikhail Korobov +Mitar +Neil Chintomby +Noah Kantrowitz +Norman Richards +Patrick Altman +Piotr Sikora +Remy Noel +Reza Lotun +Roberto Gaiser +Roger Hu +Rune Halvorsen +Ryan P. Kelly +Ryan Petrello +Sam Cooke +Sean Creeley +Sean O'Connor +Seong Won Mun +Simon Josi +Steeve Morin +Stefan Kjartansson +Steven Skoczen +Tayfun Sen +Thomas Johansson +Timo Sugliani +Travis Swicegood +Vincent Driessen +Vitaly Babiy +Vladimir Kryachko +Wes Turner +Wes Winham +Yury V. Zaytsev +jpellerin +kuno +lookfwd +sdcooke +Łukasz Langa +Łukasz Oleś diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000..e7c49d1 --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,81 @@ +# Makefile for Sphinx documentation +# + +# You can set these variables from the command-line. +SPHINXOPTS = +SPHINXBUILD = sphinx-build +PAPER = + +# Internal variables. +PAPEROPT_a4 = -D latex_paper_size=a4 +PAPEROPT_letter = -D latex_paper_size=letter +ALLSPHINXOPTS = -d .build/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . + +.PHONY: help clean html web pickle htmlhelp latex changes linkcheck + +help: + @echo "Please use \`make ' where is one of" + @echo " html to make standalone HTML files" + @echo " pickle to make pickle files" + @echo " json to make JSON files" + @echo " htmlhelp to make HTML files and a HTML help project" + @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" + @echo " changes to make an overview over all changed/added/deprecated items" + @echo " linkcheck to check all external links for integrity" + +clean: + -rm -rf .build/* + +html: + mkdir -p .build/html .build/doctrees + $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) .build/html + @echo + @echo "Build finished. The HTML pages are in .build/html." + +coverage: + mkdir -p .build/coverage .build/doctrees + $(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) .build/coverage + @echo + @echo "Build finished." + +pickle: + mkdir -p .build/pickle .build/doctrees + $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) .build/pickle + @echo + @echo "Build finished; now you can process the pickle files." + +web: pickle + +json: + mkdir -p .build/json .build/doctrees + $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) .build/json + @echo + @echo "Build finished; now you can process the JSON files." + +htmlhelp: + mkdir -p .build/htmlhelp .build/doctrees + $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) .build/htmlhelp + @echo + @echo "Build finished; now you can run HTML Help Workshop with the" \ + ".hhp project file in .build/htmlhelp." + +latex: + mkdir -p .build/latex .build/doctrees + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) .build/latex + @echo + @echo "Build finished; the LaTeX files are in .build/latex." + @echo "Run \`make all-pdf' or \`make all-ps' in that directory to" \ + "run these through (pdf)latex." + +changes: + mkdir -p .build/changes .build/doctrees + $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) .build/changes + @echo + @echo "The overview file is in .build/changes." + +linkcheck: + mkdir -p .build/linkcheck .build/doctrees + $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) .build/linkcheck + @echo + @echo "Link check complete; look for any errors in the above output " \ + "or in .build/linkcheck/output.txt." diff --git a/docs/THANKS b/docs/THANKS new file mode 100644 index 0000000..7150333 --- /dev/null +++ b/docs/THANKS @@ -0,0 +1,6 @@ +Thanks to Rune Halvorsen for the name. +Thanks to Anton Tsigularov for the previous name (crunchy) + which we had to abandon because of an existing project with that name. +Thanks to Armin Ronacher for the Sphinx theme. +Thanks to Brian K. Jones for bunny.py (http://github.com/bkjones/bunny), the + tool that inspired 'celery amqp'. diff --git a/docs/_ext/applyxrefs.py b/docs/_ext/applyxrefs.py new file mode 100644 index 0000000..deed5d9 --- /dev/null +++ b/docs/_ext/applyxrefs.py @@ -0,0 +1,92 @@ +"""Adds xref targets to the top of files.""" + +import sys +import os + +testing = False + +DONT_TOUCH = ( + './index.txt', +) + + +def target_name(fn): + if fn.endswith('.txt'): + fn = fn[:-4] + return '_' + fn.lstrip('./').replace('/', '-') + + +def process_file(fn, lines): + lines.insert(0, '\n') + lines.insert(0, '.. %s:\n' % target_name(fn)) + try: + f = open(fn, 'w') + except IOError: + print("Can't open %s for writing. Not touching it." % fn) + return + try: + f.writelines(lines) + except IOError: + print("Can't write to %s. Not touching it." % fn) + finally: + f.close() + + +def has_target(fn): + try: + f = open(fn, 'r') + except IOError: + print("Can't open %s. Not touching it." % fn) + return (True, None) + readok = True + try: + lines = f.readlines() + except IOError: + print("Can't read %s. Not touching it." % fn) + readok = False + finally: + f.close() + if not readok: + return (True, None) + + #print fn, len(lines) + if len(lines) < 1: + print("Not touching empty file %s." % fn) + return (True, None) + if lines[0].startswith('.. _'): + return (True, None) + return (False, lines) + + +def main(argv=None): + if argv is None: + argv = sys.argv + + if len(argv) == 1: + argv.extend('.') + + files = [] + for root in argv[1:]: + for (dirpath, dirnames, filenames) in os.walk(root): + files.extend([(dirpath, f) for f in filenames]) + files.sort() + files = [os.path.join(p, fn) for p, fn in files if fn.endswith('.txt')] + #print files + + for fn in files: + if fn in DONT_TOUCH: + print("Skipping blacklisted file %s." % fn) + continue + + target_found, lines = has_target(fn) + if not target_found: + if testing: + print '%s: %s' % (fn, lines[0]), + else: + print "Adding xref to %s" % fn + process_file(fn, lines) + else: + print "Skipping %s: already has a xref" % fn + +if __name__ == '__main__': + sys.exit(main()) diff --git a/docs/_ext/celerydocs.py b/docs/_ext/celerydocs.py new file mode 100644 index 0000000..2cbc97f --- /dev/null +++ b/docs/_ext/celerydocs.py @@ -0,0 +1,151 @@ +from docutils import nodes + +from sphinx.environment import NoUri + +APPATTRS = { + 'amqp': 'celery.app.amqp.AMQP', + 'backend': 'celery.backends.base.BaseBackend', + 'control': 'celery.app.control.Control', + 'events': 'celery.events.Events', + 'loader': 'celery.app.loaders.base.BaseLoader', + 'log': 'celery.app.log.Logging', + 'pool': 'kombu.connection.ConnectionPool', + 'tasks': 'celery.app.registry.Registry', + + 'AsyncResult': 'celery.result.AsyncResult', + 'GroupResult': 'celery.result.GroupResult', + 'Worker': 'celery.apps.worker.Worker', + 'WorkController': 'celery.worker.WorkController', + 'Beat': 'celery.apps.beat.Beat', + 'Task': 'celery.app.task.Task', + 'send_task': 'celery.Celery.send_task', + 'connection': 'celery.Celery.connection', +} + +ABBRS = { + 'Celery': 'celery.Celery', +} + +ABBR_EMPTY = { + 'exc': 'celery.exceptions', +} +DEFAULT_EMPTY = 'celery.Celery' + + +def typeify(S, type): + if type in ('meth', 'func'): + return S + '()' + return S + + +def shorten(S, newtarget, src_dict): + if S.startswith('@-'): + return S[2:] + elif S.startswith('@'): + if src_dict is APPATTRS: + return '.'.join([pkg_of(newtarget), S[1:]]) + return S[1:] + return S + + +def get_abbr(pre, rest, type): + if pre: + for d in APPATTRS, ABBRS: + try: + return d[pre], rest, d + except KeyError: + pass + raise KeyError(pre) + else: + for d in APPATTRS, ABBRS: + try: + return d[rest], '', d + except KeyError: + pass + return ABBR_EMPTY.get(type, DEFAULT_EMPTY), rest, ABBR_EMPTY + + +def resolve(S, type): + if S.startswith('@'): + S = S.lstrip('@-') + try: + pre, rest = S.split('.', 1) + except ValueError: + pre, rest = '', S + + target, rest, src = get_abbr(pre, rest, type) + return '.'.join([target, rest]) if rest else target, src + return S, None + + +def pkg_of(module_fqdn): + return module_fqdn.split('.', 1)[0] + + +def basename(module_fqdn): + return module_fqdn.lstrip('@').rsplit('.', -1)[-1] + + +def modify_textnode(T, newtarget, node, src_dict, type): + src = node.children[0].rawsource + return nodes.Text( + (typeify(basename(T), type) if '~' in src + else typeify(shorten(T, newtarget, src_dict), type)), + src, + ) + + +def maybe_resolve_abbreviations(app, env, node, contnode): + domainname = node.get('refdomain') + target = node['reftarget'] + type = node['reftype'] + if target.startswith('@'): + newtarget, src_dict = resolve(target, type) + node['reftarget'] = newtarget + # shorten text if '~' is not enabled. + if len(contnode) and isinstance(contnode[0], nodes.Text): + contnode[0] = modify_textnode(target, newtarget, node, + src_dict, type) + if domainname: + try: + domain = env.domains[node.get('refdomain')] + except KeyError: + raise NoUri + return domain.resolve_xref(env, node['refdoc'], app.builder, + type, newtarget, + node, contnode) + + +def setup(app): + app.connect('missing-reference', maybe_resolve_abbreviations) + + app.add_crossref_type( + directivename='setting', + rolename='setting', + indextemplate='pair: %s; setting', + ) + app.add_crossref_type( + directivename='sig', + rolename='sig', + indextemplate='pair: %s; sig', + ) + app.add_crossref_type( + directivename='state', + rolename='state', + indextemplate='pair: %s; state', + ) + app.add_crossref_type( + directivename='control', + rolename='control', + indextemplate='pair: %s; control', + ) + app.add_crossref_type( + directivename='signal', + rolename='signal', + indextemplate='pair: %s; signal', + ) + app.add_crossref_type( + directivename='event', + rolename='event', + indextemplate='pair: %s; event', + ) diff --git a/docs/_ext/githubsphinx.py b/docs/_ext/githubsphinx.py new file mode 100644 index 0000000..4553f03 --- /dev/null +++ b/docs/_ext/githubsphinx.py @@ -0,0 +1,110 @@ +"""Stolen from sphinxcontrib-issuetracker. + +Had to modify this as the original will make one Github API request +per issue, which is not at all needed if we just want to link to issues. + +""" +from __future__ import absolute_import + +import re +import sys + +from collections import namedtuple + +from docutils import nodes +from docutils.transforms import Transform +from sphinx.roles import XRefRole +from sphinx.addnodes import pending_xref + +URL = 'https://github.com/{project}/issues/{issue_id}' + +Issue = namedtuple('Issue', ('id', 'title', 'url')) + +if sys.version_info[0] == 3: + str_t = text_t = str +else: + str_t = basestring + text_t = unicode + + +class IssueRole(XRefRole): + innernodeclass = nodes.inline + + +class Issues(Transform): + default_priority = 999 + + def apply(self): + config = self.document.settings.env.config + github_project = config.github_project + issue_pattern = config.github_issue_pattern + if isinstance(issue_pattern, str_t): + issue_pattern = re.compile(issue_pattern) + for node in self.document.traverse(nodes.Text): + parent = node.parent + if isinstance(parent, (nodes.literal, nodes.FixedTextElement)): + continue + text = text_t(node) + new_nodes = [] + last_issue_ref_end = 0 + for match in issue_pattern.finditer(text): + head = text[last_issue_ref_end:match.start()] + if head: + new_nodes.append(nodes.Text(head)) + last_issue_ref_end = match.end() + issuetext = match.group(0) + issue_id = match.group(1) + refnode = pending_xref() + refnode['reftarget'] = issue_id + refnode['reftype'] = 'issue' + refnode['github_project'] = github_project + reftitle = issuetext + refnode.append(nodes.inline( + issuetext, reftitle, classes=['xref', 'issue'])) + new_nodes.append(refnode) + if not new_nodes: + continue + tail = text[last_issue_ref_end:] + if tail: + new_nodes.append(nodes.Text(tail)) + parent.replace(node, new_nodes) + + +def make_issue_reference(issue, content_node): + reference = nodes.reference() + reference['refuri'] = issue.url + if issue.title: + reference['reftitle'] = issue.title + reference.append(content_node) + return reference + + +def resolve_issue_reference(app, env, node, contnode): + if node['reftype'] != 'issue': + return + issue_id = node['reftarget'] + project = node['github_project'] + + issue = Issue(issue_id, None, URL.format(project=project, + issue_id=issue_id)) + conttext = text_t(contnode[0]) + formatted_conttext = nodes.Text(conttext.format(issue=issue)) + formatted_contnode = nodes.inline(conttext, formatted_conttext, + classes=contnode['classes']) + return make_issue_reference(issue, formatted_contnode) + + +def init_transformer(app): + app.add_transform(Issues) + + +def setup(app): + app.require_sphinx('1.0') + app.add_role('issue', IssueRole()) + + app.add_config_value('github_project', None, 'env') + app.add_config_value('github_issue_pattern', + re.compile(r'[Ii]ssue #(\d+)'), 'env') + + app.connect(str('builder-inited'), init_transformer) + app.connect(str('missing-reference'), resolve_issue_reference) diff --git a/docs/_ext/literals_to_xrefs.py b/docs/_ext/literals_to_xrefs.py new file mode 100644 index 0000000..38dad0b --- /dev/null +++ b/docs/_ext/literals_to_xrefs.py @@ -0,0 +1,179 @@ +""" +Runs through a reST file looking for old-style literals, and helps replace them +with new-style references. +""" + +import re +import sys +import shelve + +try: + input = input +except NameError: + input = raw_input # noqa + +refre = re.compile(r'``([^`\s]+?)``') + +ROLES = ( + 'attr', + 'class', + "djadmin", + 'data', + 'exc', + 'file', + 'func', + 'lookup', + 'meth', + 'mod', + "djadminopt", + "ref", + "setting", + "term", + "tfilter", + "ttag", + + # special + "skip", +) + +ALWAYS_SKIP = [ + "NULL", + "True", + "False", +] + + +def fixliterals(fname): + data = open(fname).read() + + last = 0 + new = [] + storage = shelve.open("/tmp/literals_to_xref.shelve") + lastvalues = storage.get("lastvalues", {}) + + for m in refre.finditer(data): + + new.append(data[last:m.start()]) + last = m.end() + + line_start = data.rfind("\n", 0, m.start()) + line_end = data.find("\n", m.end()) + prev_start = data.rfind("\n", 0, line_start) + next_end = data.find("\n", line_end + 1) + + # Skip always-skip stuff + if m.group(1) in ALWAYS_SKIP: + new.append(m.group(0)) + continue + + # skip when the next line is a title + next_line = data[m.end():next_end].strip() + if next_line[0] in "!-/:-@[-`{-~" and \ + all(c == next_line[0] for c in next_line): + new.append(m.group(0)) + continue + + sys.stdout.write("\n" + "-" * 80 + "\n") + sys.stdout.write(data[prev_start + 1:m.start()]) + sys.stdout.write(colorize(m.group(0), fg="red")) + sys.stdout.write(data[m.end():next_end]) + sys.stdout.write("\n\n") + + replace_type = None + while replace_type is None: + replace_type = input( + colorize("Replace role: ", fg="yellow")).strip().lower() + if replace_type and replace_type not in ROLES: + replace_type = None + + if replace_type == "": + new.append(m.group(0)) + continue + + if replace_type == "skip": + new.append(m.group(0)) + ALWAYS_SKIP.append(m.group(1)) + continue + + default = lastvalues.get(m.group(1), m.group(1)) + if default.endswith("()") and \ + replace_type in ("class", "func", "meth"): + default = default[:-2] + replace_value = input( + colorize("Text [", fg="yellow") + + default + colorize("]: ", fg="yellow"), + ).strip() + if not replace_value: + replace_value = default + new.append(":%s:`%s`" % (replace_type, replace_value)) + lastvalues[m.group(1)] = replace_value + + new.append(data[last:]) + open(fname, "w").write("".join(new)) + + storage["lastvalues"] = lastvalues + storage.close() + + +def colorize(text='', opts=(), **kwargs): + """ + Returns your text, enclosed in ANSI graphics codes. + + Depends on the keyword arguments 'fg' and 'bg', and the contents of + the opts tuple/list. + + Returns the RESET code if no parameters are given. + + Valid colors: + 'black', 'red', 'green', 'yellow', 'blue', 'magenta', 'cyan', 'white' + + Valid options: + 'bold' + 'underscore' + 'blink' + 'reverse' + 'conceal' + 'noreset' - string will not be auto-terminated with the RESET code + + Examples: + colorize('hello', fg='red', bg='blue', opts=('blink',)) + colorize() + colorize('goodbye', opts=('underscore',)) + print colorize('first line', fg='red', opts=('noreset',)) + print 'this should be red too' + print colorize('and so should this') + print 'this should not be red' + """ + color_names = ('black', 'red', 'green', 'yellow', + 'blue', 'magenta', 'cyan', 'white') + foreground = dict([(color_names[x], '3%s' % x) for x in range(8)]) + background = dict([(color_names[x], '4%s' % x) for x in range(8)]) + + RESET = '0' + opt_dict = {'bold': '1', + 'underscore': '4', + 'blink': '5', + 'reverse': '7', + 'conceal': '8'} + + text = str(text) + code_list = [] + if text == '' and len(opts) == 1 and opts[0] == 'reset': + return '\x1b[%sm' % RESET + for k, v in kwargs.items(): + if k == 'fg': + code_list.append(foreground[v]) + elif k == 'bg': + code_list.append(background[v]) + for o in opts: + if o in opt_dict: + code_list.append(opt_dict[o]) + if 'noreset' not in opts: + text = text + '\x1b[%sm' % RESET + return ('\x1b[%sm' % ';'.join(code_list)) + text + +if __name__ == '__main__': + try: + fixliterals(sys.argv[1]) + except (KeyboardInterrupt, SystemExit): + print diff --git a/docs/_theme/celery/static/celery.css_t b/docs/_theme/celery/static/celery.css_t new file mode 100644 index 0000000..807081a --- /dev/null +++ b/docs/_theme/celery/static/celery.css_t @@ -0,0 +1,401 @@ +/* + * celery.css_t + * ~~~~~~~~~~~~ + * + * :copyright: Copyright 2010 by Armin Ronacher. + * :license: BSD, see LICENSE for details. + */ + +{% set page_width = 940 %} +{% set sidebar_width = 220 %} +{% set body_font_stack = 'Optima, Segoe, "Segoe UI", Candara, Calibri, Arial, sans-serif' %} +{% set headline_font_stack = 'Futura, "Trebuchet MS", Arial, sans-serif' %} +{% set code_font_stack = "'Consolas', 'Menlo', 'Deja Vu Sans Mono', 'Bitstream Vera Sans Mono', monospace" %} + +@import url("basic.css"); + +/* -- page layout ----------------------------------------------------------- */ + +body { + font-family: {{ body_font_stack }}; + font-size: 17px; + background-color: white; + color: #000; + margin: 30px 0 0 0; + padding: 0; +} + +div.document { + width: {{ page_width }}px; + margin: 0 auto; +} + +div.deck { + font-size: 18px; +} + +p.developmentversion { + color: red; +} + +div.related { + width: {{ page_width - 20 }}px; + padding: 5px 10px; + background: #F2FCEE; + margin: 15px auto 15px auto; +} + +div.documentwrapper { + float: left; + width: 100%; +} + +div.bodywrapper { + margin: 0 0 0 {{ sidebar_width }}px; +} + +div.sphinxsidebar { + width: {{ sidebar_width }}px; +} + +hr { + border: 1px solid #B1B4B6; +} + +div.body { + background-color: #ffffff; + color: #3E4349; + padding: 0 30px 0 30px; +} + +img.celerylogo { + padding: 0 0 10px 10px; + float: right; +} + +div.footer { + width: {{ page_width - 15 }}px; + margin: 10px auto 30px auto; + padding-right: 15px; + font-size: 14px; + color: #888; + text-align: right; +} + +div.footer a { + color: #888; +} + +div.sphinxsidebar a { + color: #444; + text-decoration: none; + border-bottom: 1px dashed #DCF0D5; +} + +div.sphinxsidebar a:hover { + border-bottom: 1px solid #999; +} + +div.sphinxsidebar { + font-size: 14px; + line-height: 1.5; +} + +div.sphinxsidebarwrapper { + padding: 7px 10px; +} + +div.sphinxsidebarwrapper p.logo { + padding: 0 0 20px 0; + margin: 0; +} + +div.sphinxsidebar h3, +div.sphinxsidebar h4 { + font-family: {{ headline_font_stack }}; + color: #444; + font-size: 24px; + font-weight: normal; + margin: 0 0 5px 0; + padding: 0; +} + +div.sphinxsidebar h4 { + font-size: 20px; +} + +div.sphinxsidebar h3 a { + color: #444; +} + +div.sphinxsidebar p.logo a, +div.sphinxsidebar h3 a, +div.sphinxsidebar p.logo a:hover, +div.sphinxsidebar h3 a:hover { + border: none; +} + +div.sphinxsidebar p { + color: #555; + margin: 10px 0; +} + +div.sphinxsidebar ul { + margin: 10px 0; + padding: 0; + color: #000; +} + +div.sphinxsidebar input { + border: 1px solid #ccc; + font-family: {{ body_font_stack }}; + font-size: 1em; +} + +/* -- body styles ----------------------------------------------------------- */ + +a { + color: #348613; + text-decoration: underline; +} + +a:hover { + color: #59B833; + text-decoration: underline; +} + +div.body h1, +div.body h2, +div.body h3, +div.body h4, +div.body h5, +div.body h6 { + font-family: {{ headline_font_stack }}; + font-weight: normal; + margin: 30px 0px 10px 0px; + padding: 0; +} + +div.body h1 { margin-top: 0; padding-top: 0; font-size: 200%; } +div.body h2 { font-size: 180%; } +div.body h3 { font-size: 150%; } +div.body h4 { font-size: 130%; } +div.body h5 { font-size: 100%; } +div.body h6 { font-size: 100%; } + +div.body h1 a.toc-backref, +div.body h2 a.toc-backref, +div.body h3 a.toc-backref, +div.body h4 a.toc-backref, +div.body h5 a.toc-backref, +div.body h6 a.toc-backref { + color: inherit!important; + text-decoration: none; +} + +a.headerlink { + color: #ddd; + padding: 0 4px; + text-decoration: none; +} + +a.headerlink:hover { + color: #444; + background: #eaeaea; +} + +div.body p, div.body dd, div.body li { + line-height: 1.4em; +} + +div.admonition { + background: #fafafa; + margin: 20px -30px; + padding: 10px 30px; + border-top: 1px solid #ccc; + border-bottom: 1px solid #ccc; +} + +div.admonition p.admonition-title { + font-family: {{ headline_font_stack }}; + font-weight: normal; + font-size: 24px; + margin: 0 0 10px 0; + padding: 0; + line-height: 1; +} + +div.admonition p.last { + margin-bottom: 0; +} + +div.highlight{ + background-color: white; +} + +dt:target, .highlight { + background: #FAF3E8; +} + +div.note { + background-color: #eee; + border: 1px solid #ccc; +} + +div.seealso { + background-color: #ffc; + border: 1px solid #ff6; +} + +div.topic { + background-color: #eee; +} + +div.warning { + background-color: #ffe4e4; + border: 1px solid #f66; +} + +p.admonition-title { + display: inline; +} + +p.admonition-title:after { + content: ":"; +} + +pre, tt { + font-family: {{ code_font_stack }}; + font-size: 0.9em; +} + +img.screenshot { +} + +tt.descname, tt.descclassname { + font-size: 0.95em; +} + +tt.descname { + padding-right: 0.08em; +} + +img.screenshot { + -moz-box-shadow: 2px 2px 4px #eee; + -webkit-box-shadow: 2px 2px 4px #eee; + box-shadow: 2px 2px 4px #eee; +} + +table.docutils { + border: 1px solid #888; + -moz-box-shadow: 2px 2px 4px #eee; + -webkit-box-shadow: 2px 2px 4px #eee; + box-shadow: 2px 2px 4px #eee; +} + +table.docutils td, table.docutils th { + border: 1px solid #888; + padding: 0.25em 0.7em; +} + +table.field-list, table.footnote { + border: none; + -moz-box-shadow: none; + -webkit-box-shadow: none; + box-shadow: none; +} + +table.footnote { + margin: 15px 0; + width: 100%; + border: 1px solid #eee; + background: #fdfdfd; + font-size: 0.9em; +} + +table.footnote + table.footnote { + margin-top: -15px; + border-top: none; +} + +table.field-list th { + padding: 0 0.8em 0 0; +} + +table.field-list td { + padding: 0; +} + +table.footnote td.label { + width: 0px; + padding: 0.3em 0 0.3em 0.5em; +} + +table.footnote td { + padding: 0.3em 0.5em; +} + +dl { + margin: 0; + padding: 0; +} + +dl dd { + margin-left: 30px; +} + +blockquote { + margin: 0 0 0 30px; + padding: 0; +} + +ul { + margin: 10px 0 10px 30px; + padding: 0; +} + +pre { + background: #F0FFEB; + padding: 7px 10px; + margin: 15px 0; + border: 1px solid #C7ECB8; + border-radius: 2px; + -moz-border-radius: 2px; + -webkit-border-radius: 2px; + line-height: 1.3em; +} + +tt { + background: #F0FFEB; + color: #222; + /* padding: 1px 2px; */ +} + +tt.xref, a tt { + background: #F0FFEB; + border-bottom: 1px solid white; +} + +a.reference { + text-decoration: none; + border-bottom: 1px dashed #DCF0D5; +} + +a.reference:hover { + border-bottom: 1px solid #6D4100; +} + +a.footnote-reference { + text-decoration: none; + font-size: 0.7em; + vertical-align: top; + border-bottom: 1px dashed #DCF0D5; +} + +a.footnote-reference:hover { + border-bottom: 1px solid #6D4100; +} + +a:hover tt { + background: #EEE; +} diff --git a/docs/_theme/celery/theme.conf b/docs/_theme/celery/theme.conf new file mode 100644 index 0000000..9ad052c --- /dev/null +++ b/docs/_theme/celery/theme.conf @@ -0,0 +1,5 @@ +[theme] +inherit = basic +stylesheet = celery.css + +[options] diff --git a/docs/changelog.rst b/docs/changelog.rst new file mode 100644 index 0000000..d2d79f9 --- /dev/null +++ b/docs/changelog.rst @@ -0,0 +1,1094 @@ +.. _changelog: + +================ + Change history +================ + +This document contains change notes for bugfix releases in the 3.1.x series +(Cipater), please see :ref:`whatsnew-3.1` for an overview of what's +new in Celery 3.1. + +.. _version-3.1.13: + +3.1.13 +====== + +Security Fixes +-------------- + +* [Security: `CELERYSA-0002`_] Insecure default umask. + + The built-in utility used to daemonize the Celery worker service sets + an insecure umask by default (umask 0). + + This means that any files or directories created by the worker will + end up having world-writable permissions. + + Special thanks to Red Hat for originally discovering and reporting the + issue! + + This version will no longer set a default umask by default, so if unset + the umask of the parent process will be used. + +.. _`CELERYSA-0002`: + http://github.com/celery/celery/tree/master/docs/sec/CELERYSA-0002.txt + +News +---- + +- **Requirements** + + - Now depends on :ref:`Kombu 3.0.21 `. + + - Now depends on :mod:`billiard` 3.3.0.18. + + +- **App**: ``backend`` argument now also sets the :setting:`CELERY_RESULT_BACKEND` + setting. + +- **Task**: ``signature_from_request`` now propagates ``reply_to`` so that + the RPC backend works with retried tasks (Issue #2113). + +- **Task**: ``retry`` will no longer attempt to requeue the task if sending + the retry message fails. + + Unrelated exceptions being raised could cause a message loop, so it was + better to remove this behavior. + +- **Beat**: Accounts for standard 1ms drift by always waking up 0.010s + earlier. + + This will adjust the latency so that the periodic tasks will not move + 1ms after every invocation. + +- Documentation fixes + + Contributed by Yuval Greenfield, Lucas Wiman, nicholsonjf + +- **Worker**: Removed an outdated assert statement that could lead to errors + being masked (Issue #2086). + + + +.. _version-3.1.12: + +3.1.12 +====== +:release-date: 2014-06-09 10:12 P.M UTC +:release-by: Ask Solem + +- **Requirements** + + Now depends on :ref:`Kombu 3.0.19 `. + +- **App**: Connections were not being closed after fork due to an error in the + after fork handler (Issue #2055). + + This could manifest itself by causing framing errors when using RabbitMQ. + (``Unexpected frame``). + +- **Django**: ``django.setup()`` was being called too late when + using Django 1.7 (Issue #1802). + +- **Django**: Fixed problems with event timezones when using Django + (``Substantial drift``). + + Celery did not take into account that Django modifies the + ``time.timeone`` attributes and friends. + +- **Canvas**: ``Signature.link`` now works when the link option is a scalar + value (Issue #2019). + +- **Prefork pool**: Fixed race conditions for when file descriptors are + removed from the event loop. + + Fix contributed by Roger Hu. + +- **Prefork pool**: Improved solution for dividing tasks between child + processes. + + This change should improve performance when there are many child + processes, and also decrease the chance that two subsequent tasks are + written to the same child process. + +- **Worker**: Now ignores unknown event types, instead of crashing. + + Fix contributed by Illes Solt. + +- **Programs**: :program:`celery worker --detach` no longer closes open file + descriptors when :envvar:`C_FAKEFORK` is used so that the workers output + can be seen. + +- **Programs**: The default working directory for :program:`celery worker + --detach` is now the current working directory, not ``/``. + +- **Canvas**: ``signature(s, app=app)`` did not upgrade serialized signatures + to their original class (``subtask_type``) when the ``app`` keyword argument + was used. + +- **Control**: The ``duplicate nodename`` warning emitted by control commands + now shows the duplicate node name. + +- **Tasks**: Can now call ``ResultSet.get()`` on a result set without members. + + Fix contributed by Alexey Kotlyarov. + +- **App**: Fixed strange traceback mangling issue for + ``app.connection_or_acquire``. + +- **Programs**: The :program:`celery multi stopwait` command is now documented + in usage. + +- **Other**: Fixed cleanup problem with ``PromiseProxy`` when an error is + raised while trying to evaluate the promise. + +- **Other**: The utility used to censor configuration values now handles + non-string keys. + + Fix contributed by Luke Pomfrey. + +- **Other**: The ``inspect conf`` command did not handle non-string keys well. + + Fix contributed by Jay Farrimond. + +- **Programs**: Fixed argument handling problem in + :program:`celery worker --detach`. + + Fix contributed by Dmitry Malinovsky. + +- **Programs**: :program:`celery worker --detach` did not forward working + directory option (Issue #2003). + +- **Programs**: :program:`celery inspect registered` no longer includes + the list of built-in tasks. + +- **Worker**: The ``requires`` attribute for boot steps were not being handled + correctly (Issue #2002). + +- **Eventlet**: The eventlet pool now supports the ``pool_grow`` and + ``pool_shrink`` remote control commands. + + Contributed by Mher Movsisyan. + +- **Eventlet**: The eventlet pool now implements statistics for + :program:``celery inspect stats``. + + Contributed by Mher Movsisyan. + +- **Documentation**: Clarified ``Task.rate_limit`` behavior. + + Contributed by Jonas Haag. + +- **Documentation**: ``AbortableTask`` examples now updated to use the new + API (Issue #1993). + +- **Documentation**: The security documentation examples used an out of date + import. + + Fix contributed by Ian Dees. + +- **Init scripts**: The CentOS init scripts did not quote + :envvar:`CELERY_CHDIR`. + + Fix contributed by ffeast. + +.. _version-3.1.11: + +3.1.11 +====== +:release-date: 2014-04-16 11:00 P.M UTC +:release-by: Ask Solem + +- **Now compatible with RabbitMQ 3.3.0** + + You need to run Celery 3.1.11 or later when using RabbitMQ 3.3, + and if you use the ``librabbitmq`` module you also have to upgrade + to librabbitmq 1.5.0: + + .. code-block:: bash + + $ pip install -U librabbitmq + +- **Requirements**: + + - Now depends on :ref:`Kombu 3.0.15 `. + + - Now depends on `billiard 3.3.0.17`_. + + - Bundle ``celery[librabbitmq]`` now depends on :mod:`librabbitmq` 1.5.0. + +.. _`billiard 3.3.0.17`: + https://github.com/celery/billiard/blob/master/CHANGES.txt + +- **Tasks**: The :setting:`CELERY_DEFAULT_DELIVERY_MODE` setting was being + ignored (Issue #1953). + +- **Worker**: New :option:`--heartbeat-interval` can be used to change the + time (in seconds) between sending event heartbeats. + + Contributed by Matthew Duggan and Craig Northway. + +- **App**: Fixed memory leaks occurring when creating lots of temporary + app instances (Issue #1949). + +- **MongoDB**: SSL configuration with non-MongoDB transport breaks MongoDB + results backend (Issue #1973). + + Fix contributed by Brian Bouterse. + +- **Logging**: The color formatter accidentally modified ``record.msg`` + (Issue #1939). + +- **Results**: Fixed problem with task trails being stored multiple times, + causing ``result.collect()`` to hang (Issue #1936, Issue #1943). + +- **Results**: ``ResultSet`` now implements a ``.backend`` attribute for + compatibility with ``AsyncResult``. + +- **Results**: ``.forget()`` now also clears the local cache. + +- **Results**: Fixed problem with multiple calls to ``result._set_cache`` + (Issue #1940). + +- **Results**: ``join_native`` populated result cache even if disabled. + +- **Results**: The YAML result serializer should now be able to handle storing + exceptions. + +- **Worker**: No longer sends task error emails for expected errors (in + ``@task(throws=(..., )))``. + +- **Canvas**: Fixed problem with exception deserialization when using + the JSON serializer (Issue #1987). + +- **Eventlet**: Fixes crash when ``celery.contrib.batches`` attempted to + cancel a non-existing timer (Issue #1984). + +- Can now import ``celery.version_info_t``, and ``celery.five`` (Issue #1968). + + +.. _version-3.1.10: + +3.1.10 +====== +:release-date: 2014-03-22 09:40 P.M UTC +:release-by: Ask Solem + +- **Requirements**: + + - Now depends on :ref:`Kombu 3.0.14 `. + +- **Results**: + + Reliability improvements to the SQLAlchemy database backend. Previously the + connection from the MainProcess was improperly shared with the workers. + (Issue #1786) + +- **Redis:** Important note about events (Issue #1882). + + There is a new transport option for Redis that enables monitors + to filter out unwanted events. Enabling this option in the workers + will increase performance considerably: + + .. code-block:: python + + BROKER_TRANSPORT_OPTIONS = {'fanout_patterns': True} + + Enabling this option means that your workers will not be able to see + workers with the option disabled (or is running an older version of + Celery), so if you do enable it then make sure you do so on all + nodes. + + See :ref:`redis-caveats-fanout-patterns`. + + This will be the default in Celery 3.2. + +- **Results**: The :class:`@AsyncResult` object now keeps a local cache + of the final state of the task. + + This means that the global result cache can finally be disabled, + and you can do so by setting :setting:`CELERY_MAX_CACHED_RESULTS` to + :const:`-1`. The lifetime of the cache will then be bound to the + lifetime of the result object, which will be the default behavior + in Celery 3.2. + +- **Events**: The "Substantial drift" warning message is now logged once + per node name only (Issue #1802). + +- **Worker**: Ability to use one log file per child process when using the + prefork pool. + + This can be enabled by using the new ``%i`` and ``%I`` format specifiers + for the log file name. See :ref:`worker-files-process-index`. + +- **Redis**: New experimental chord join implementation. + + This is an optimization for chords when using the Redis result backend, + where the join operation is now considerably faster and using less + resources than the previous strategy. + + The new option can be set in the result backend URL: + + CELERY_RESULT_BACKEND = 'redis://localhost?new_join=1' + + This must be enabled manually as it's incompatible + with workers and clients not using it, so be sure to enable + the option in all clients and workers if you decide to use it. + +- **Multi**: With ``-opt:index`` (e.g. :option:`-c:1`) the index now always refers + to the position of a node in the argument list. + + This means that referring to a number will work when specifying a list + of node names and not just for a number range: + + .. code-block:: bash + + celery multi start A B C D -c:1 4 -c:2-4 8 + + In this example ``1`` refers to node A (as it's the first node in the + list). + +- **Signals**: The sender argument to ``Signal.connect`` can now be a proxy + object, which means that it can be used with the task decorator + (Issue #1873). + +- **Task**: A regression caused the ``queue`` argument to ``Task.retry`` to be + ignored (Issue #1892). + +- **App**: Fixed error message for :meth:`~@Celery.config_from_envvar`. + + Fix contributed by Dmitry Malinovsky. + +- **Canvas**: Chords can now contain a group of other chords (Issue #1921). + +- **Canvas**: Chords can now be combined when using the amqp result backend + (a chord where the callback is also a chord). + +- **Canvas**: Calling ``result.get()`` for a chain task will now complete + even if one of the tasks in the chain is ``ignore_result=True`` + (Issue #1905). + +- **Canvas**: Worker now also logs chord errors. + +- **Canvas**: A chord task raising an exception will now result in + any errbacks (``link_error``) to the chord callback to also be called. + +- **Results**: Reliability improvements to the SQLAlchemy database backend + (Issue #1786). + + Previously the connection from the ``MainProcess`` was improperly + inherited by child processes. + + Fix contributed by Ionel Cristian Mărieș. + +- **Task**: Task callbacks and errbacks are now called using the group + primitive. + +- **Task**: ``Task.apply`` now properly sets ``request.headers`` + (Issue #1874). + +- **Worker**: Fixed ``UnicodeEncodeError`` occuring when worker is started + by `supervisord`. + + Fix contributed by Codeb Fan. + +- **Beat**: No longer attempts to upgrade a newly created database file + (Issue #1923). + +- **Beat**: New setting :setting:``CELERYBEAT_SYNC_EVERY`` can be be used + to control file sync by specifying the number of tasks to send between + each sync. + + Contributed by Chris Clark. + +- **Commands**: :program:`celery inspect memdump` no longer crashes + if the :mod:`psutil` module is not installed (Issue #1914). + +- **Worker**: Remote control commands now always accepts json serialized + messages (Issue #1870). + +- **Worker**: Gossip will now drop any task related events it receives + by mistake (Issue #1882). + + +.. _version-3.1.9: + +3.1.9 +===== +:release-date: 2014-02-10 06:43 P.M UTC +:release-by: Ask Solem + +- **Requirements**: + + - Now depends on :ref:`Kombu 3.0.12 `. + +- **Prefork pool**: Better handling of exiting child processes. + + Fix contributed by Ionel Cristian Mărieș. + +- **Prefork pool**: Now makes sure all file descriptors are removed + from the hub when a process is cleaned up. + + Fix contributed by Ionel Cristian Mărieș. + +- **New Sphinx extension**: for autodoc documentation of tasks: + :mod:`celery.contrib.spinx` (Issue #1833). + +- **Django**: Now works with Django 1.7a1. + +- **Task**: Task.backend is now a property that forwards to ``app.backend`` + if no custom backend has been specified for the task (Issue #1821). + +- **Generic init scripts**: Fixed bug in stop command. + + Fix contributed by Rinat Shigapov. + +- **Generic init scripts**: Fixed compatibility with GNU :manpage:`stat`. + + Fix contributed by Paul Kilgo. + +- **Generic init scripts**: Fixed compatibility with the minimal + :program:`dash` shell (Issue #1815). + +- **Commands**: The :program:`celery amqp basic.publish` command was not + working properly. + + Fix contributed by Andrey Voronov. + +- **Commands**: Did no longer emit an error message if the pidfile exists + and the process is still alive (Issue #1855). + +- **Commands**: Better error message for missing arguments to preload + options (Issue #1860). + +- **Commands**: :program:`celery -h` did not work because of a bug in the + argument parser (Issue #1849). + +- **Worker**: Improved error message for message decoding errors. + +- **Time**: Now properly parses the `Z` timezone specifier in ISO 8601 date + strings. + + Fix contributed by Martin Davidsson. + +- **Worker**: Now uses the *negotiated* heartbeat value to calculate + how often to run the heartbeat checks. + +- **Beat**: Fixed problem with beat hanging after the first schedule + iteration (Issue #1822). + + Fix contributed by Roger Hu. + +- **Signals**: The header argument to :signal:`before_task_publish` is now + always a dictionary instance so that signal handlers can add headers. + +- **Worker**: A list of message headers is now included in message related + errors. + +.. _version-3.1.8: + +3.1.8 +===== +:release-date: 2014-01-17 10:45 P.M UTC +:release-by: Ask Solem + +- **Requirements**: + + - Now depends on :ref:`Kombu 3.0.10 `. + + - Now depends on `billiard 3.3.0.14`_. + +.. _`billiard 3.3.0.14`: + https://github.com/celery/billiard/blob/master/CHANGES.txt + +- **Worker**: The event loop was not properly reinitialized at consumer restart + which would force the worker to continue with a closed ``epoll`` instance on + Linux, resulting in a crash. + +- **Events:** Fixed issue with both heartbeats and task events that could + result in the data not being kept in sorted order. + + As a result this would force the worker to log "heartbeat missed" + events even though the remote node was sending heartbeats in a timely manner. + +- **Results:** The pickle serializer no longer converts group results to tuples, + and will keep the original type (*Issue #1750*). + +- **Results:** ``ResultSet.iterate`` is now pending deprecation. + + The method will be deprecated in version 3.2 and removed in version 3.3. + + Use ``result.get(callback=)`` (or ``result.iter_native()`` where available) + instead. + +- **Worker**\|eventlet/gevent: A regression caused ``Ctrl+C`` to be ineffective + for shutdown. + +- **Redis result backend:** Now using a pipeline to store state changes + for improved performance. + + Contributed by Pepijn de Vos. + +- **Redis result backend:** Will now retry storing the result if disconnected. + +- **Worker**\|gossip: Fixed attribute error occurring when another node leaves. + + Fix contributed by Brodie Rao. + +- **Generic init scripts:** Now runs a check at startup to verify + that any configuration scripts are owned by root and that they + are not world/group writeable. + + The init script configuration is a shell script executed by root, + so this is a preventive measure to ensure that users do not + leave this file vulnerable to changes by unprivileged users. + + .. note:: + + Note that upgrading celery will not update the init scripts, + instead you need to manually copy the improved versions from the + source distribution: + https://github.com/celery/celery/tree/3.1/extra/generic-init.d + +- **Commands**: The :program:`celery purge` command now warns that the operation + will delete all tasks and prompts the user for confirmation. + + A new :option:`-f` was added that can be used to disable + interactive mode. + +- **Task**: ``.retry()`` did not raise the value provided in the ``exc`` argument + when called outside of an error context (*Issue #1755*). + +- **Commands:** The :program:`celery multi` command did not forward command + line configuration to the target workers. + + The change means that multi will forward the special ``--`` argument and + configuration content at the end of the arguments line to the specified + workers. + + Example using command-line configuration to set a broker heartbeat + from :program:`celery multi`: + + .. code-block:: bash + + $ celery multi start 1 -c3 -- broker.heartbeat=30 + + Fix contributed by Antoine Legrand. + +- **Canvas:** ``chain.apply_async()`` now properly forwards execution options. + + Fix contributed by Konstantin Podshumok. + +- **Redis result backend:** Now takes ``connection_pool`` argument that can be + used to change the connection pool class/constructor. + +- **Worker:** Now truncates very long arguments and keyword arguments logged by + the pool at debug severity. + +- **Worker:** The worker now closes all open files on :sig:`SIGHUP` (regression) + (*Issue #1768*). + + Fix contributed by Brodie Rao + +- **Worker:** Will no longer accept remote control commands while the + worker startup phase is incomplete (*Issue #1741*). + +- **Commands:** The output of the event dump utility + (:program:`celery events -d`) can now be piped into other commands. + +- **Documentation:** The RabbitMQ installation instructions for OS X was + updated to use modern homebrew practices. + + Contributed by Jon Chen. + +- **Commands:** The :program:`celery inspect conf` utility now works. + +- **Commands:** The :option:`-no-color` argument was not respected by + all commands (*Issue #1799*). + +- **App:** Fixed rare bug with ``autodiscover_tasks()`` (*Issue #1797*). + +- **Distribution:** The sphinx docs will now always add the parent directory + to path so that the current celery source code is used as a basis for + API documentation (*Issue #1782*). + +- **Documentation:** Supervisord examples contained an extraneous '-' in a + `--logfile` argument example. + + Fix contributed by Mohammad Almeer. + +.. _version-3.1.7: + +3.1.7 +===== +:release-date: 2013-12-17 06:00 P.M UTC +:release-by: Ask Solem + +.. _v317-important: + +Important Notes +--------------- + +Init script security improvements +--------------------------------- + +Where the generic init scripts (for ``celeryd``, and ``celerybeat``) before +delegated the responsibility of dropping privileges to the target application, +it will now use ``su`` instead, so that the Python program is not trusted +with superuser privileges. + +This is not in reaction to any known exploit, but it will +limit the possibility of a privilege escalation bug being abused in the +future. + +You have to upgrade the init scripts manually from this directory: +https://github.com/celery/celery/tree/3.1/extra/generic-init.d + +AMQP result backend +~~~~~~~~~~~~~~~~~~~ + +The 3.1 release accidentally left the amqp backend configured to be +non-persistent by default. + +Upgrading from 3.0 would give a "not equivalent" error when attempting to +set or retrieve results for a task. That is unless you manually set the +persistence setting:: + + CELERY_RESULT_PERSISTENT = True + +This version restores the previous value so if you already forced +the upgrade by removing the existing exchange you must either +keep the configuration by setting ``CELERY_RESULT_PERSISTENT = False`` +or delete the ``celeryresults`` exchange again. + +Synchronous subtasks +~~~~~~~~~~~~~~~~~~~~ + +Tasks waiting for the result of a subtask will now emit +a :exc:`RuntimeWarning` warning when using the prefork pool, +and in 3.2 this will result in an exception being raised. + +It's not legal for tasks to block by waiting for subtasks +as this is likely to lead to resource starvation and eventually +deadlock when using the prefork pool (see also :ref:`task-synchronous-subtasks`). + +If you really know what you are doing you can avoid the warning (and +the future exception being raised) by moving the operation in a whitelist +block: + +.. code-block:: python + + from celery.result import allow_join_result + + @app.task + def misbehaving(): + result = other_task.delay() + with allow_join_result(): + result.get() + +Note also that if you wait for the result of a subtask in any form +when using the prefork pool you must also disable the pool prefetching +behavior with the worker :ref:`-Ofair option `. + +.. _v317-fixes: + +Fixes +----- + +- Now depends on :ref:`Kombu 3.0.8 `. + +- Now depends on :mod:`billiard` 3.3.0.13 + +- Events: Fixed compatibility with non-standard json libraries + that sends float as :class:`decimal.Decimal` (Issue #1731) + +- Events: State worker objects now always defines attributes: + ``active``, ``processed``, ``loadavg``, ``sw_ident``, ``sw_ver`` + and ``sw_sys``. + +- Worker: Now keeps count of the total number of tasks processed, + not just by type (``all_active_count``). + +- Init scripts: Fixed problem with reading configuration file + when the init script is symlinked to a runlevel (e.g. ``S02celeryd``). + (Issue #1740). + + This also removed a rarely used feature where you can symlink the script + to provide alternative configurations. You instead copy the script + and give it a new name, but perhaps a better solution is to provide + arguments to ``CELERYD_OPTS`` to separate them: + + .. code-block:: bash + + CELERYD_NODES="X1 X2 Y1 Y2" + CELERYD_OPTS="-A:X1 x -A:X2 x -A:Y1 y -A:Y2 y" + +- Fallback chord unlock task is now always called after the chord header + (Issue #1700). + + This means that the unlock task will not be started if there's + an error sending the header. + +- Celery command: Fixed problem with arguments for some control commands. + + Fix contributed by Konstantin Podshumok. + +- Fixed bug in ``utcoffset`` where the offset when in DST would be + completely wrong (Issue #1743). + +- Worker: Errors occurring while attempting to serialize the result of a + task will now cause the task to be marked with failure and a + :class:`kombu.exceptions.EncodingError` error. + + Fix contributed by Ionel Cristian Mărieș. + +- Worker with ``-B`` argument did not properly shut down the beat instance. + +- Worker: The ``%n`` and ``%h`` formats are now also supported by the + :option:`--logfile`, :option:`--pidfile` and :option:`--statedb` arguments. + + Example: + + .. code-block:: bash + + $ celery -A proj worker -n foo@%h --logfile=%n.log --statedb=%n.db + +- Redis/Cache result backends: Will now timeout if keys evicted while trying + to join a chord. + +- The fallbock unlock chord task now raises :exc:`Retry` so that the + retry even is properly logged by the worker. + +- Multi: Will no longer apply Eventlet/gevent monkey patches (Issue #1717). + +- Redis result backend: Now supports UNIX sockets. + + Like the Redis broker transport the result backend now also supports + using ``redis+socket:///tmp/redis.sock`` URLs. + + Contributed by Alcides Viamontes Esquivel. + +- Events: Events sent by clients was mistaken for worker related events + (Issue #1714). + + For ``events.State`` the tasks now have a ``Task.client`` attribute + that is set when a ``task-sent`` event is being received. + + Also, a clients logical clock is not in sync with the cluster so + they live in a "time bubble". So for this reason monitors will no + longer attempt to merge with the clock of an event sent by a client, + instead it will fake the value by using the current clock with + a skew of -1. + +- Prefork pool: The method used to find terminated processes was flawed + in that it did not also take into account missing popen objects. + +- Canvas: ``group`` and ``chord`` now works with anon signatures as long + as the group/chord object is associated with an app instance (Issue #1744). + + You can pass the app by using ``group(..., app=app)``. + +.. _version-3.1.6: + +3.1.6 +===== +:release-date: 2013-12-02 06:00 P.M UTC +:release-by: Ask Solem + +- Now depends on :mod:`billiard` 3.3.0.10. + +- Now depends on :ref:`Kombu 3.0.7 `. + +- Fixed problem where Mingle caused the worker to hang at startup + (Issue #1686). + +- Beat: Would attempt to drop privileges twice (Issue #1708). + +- Windows: Fixed error with ``geteuid`` not being available (Issue #1676). + +- Tasks can now provide a list of expected error classes (Issue #1682). + + The list should only include errors that the task is expected to raise + during normal operation:: + + @task(throws=(KeyError, HttpNotFound)) + + What happens when an exceptions is raised depends on the type of error: + + - Expected errors (included in ``Task.throws``) + + Will be logged using severity ``INFO``, and traceback is excluded. + + - Unexpected errors + + Will be logged using severity ``ERROR``, with traceback included. + +- Cache result backend now compatible with Python 3 (Issue #1697). + +- CentOS init script: Now compatible with sys-v style init symlinks. + + Fix contributed by Jonathan Jordan. + +- Events: Fixed problem when task name is not defined (Issue #1710). + + Fix contributed by Mher Movsisyan. + +- Task: Fixed unbound local errors (Issue #1684). + + Fix contributed by Markus Ullmann. + +- Canvas: Now unrolls groups with only one task (optimization) (Issue #1656). + +- Task: Fixed problem with eta and timezones. + + Fix contributed by Alexander Koval. + +- Django: Worker now performs model validation (Issue #1681). + +- Task decorator now emits less confusing errors when used with + incorrect arguments (Issue #1692). + +- Task: New method ``Task.send_event`` can be used to send custom events + to Flower and other monitors. + +- Fixed a compatibility issue with non-abstract task classes + +- Events from clients now uses new node name format (``gen@``). + +- Fixed rare bug with Callable not being defined at interpreter shutdown + (Issue #1678). + + Fix contributed by Nick Johnson. + +- Fixed Python 2.6 compatibility (Issue #1679). + +.. _version-3.1.5: + +3.1.5 +===== +:release-date: 2013-11-21 06:20 P.M UTC +:release-by: Ask Solem + +- Now depends on :ref:`Kombu 3.0.6 `. + +- Now depends on :mod:`billiard` 3.3.0.8 + +- App: ``config_from_object`` is now lazy (Issue #1665). + +- App: ``autodiscover_tasks`` is now lazy. + + Django users should now wrap access to the settings object + in a lambda:: + + app.autodiscover_tasks(lambda: settings.INSTALLED_APPS) + + this ensures that the settings object is not prepared + prematurely. + +- Fixed regression for ``--app`` argument experienced by + some users (Issue #1653). + +- Worker: Now respects the ``--uid`` and ``--gid`` arguments + even if ``--detach`` is not enabled. + +- Beat: Now respects the ``--uid`` and ``--gid`` arguments + even if ``--detach`` is not enabled. + +- Python 3: Fixed unorderable error occuring with the worker ``-B`` + argument enabled. + +- ``celery.VERSION`` is now a named tuple. + +- ``maybe_signature(list)`` is now applied recursively (Issue #1645). + +- ``celery shell`` command: Fixed ``IPython.frontend`` deprecation warning. + +- The default app no longer includes the builtin fixups. + + This fixes a bug where ``celery multi`` would attempt + to load the Django settings module before entering + the target working directory. + +- The Django daemonization tutorial was changed. + + Users no longer have to explicitly export ``DJANGO_SETTINGS_MODULE`` + in :file:`/etc/default/celeryd` when the new project layout is used. + +- Redis result backend: expiry value can now be 0 (Issue #1661). + +- Censoring settings now accounts for non-string keys (Issue #1663). + +- App: New ``autofinalize`` option. + + Apps are automatically finalized when the task registry is accessed. + You can now disable this behavior so that an exception is raised + instead. + + Example: + + .. code-block:: python + + app = Celery(autofinalize=False) + + # raises RuntimeError + tasks = app.tasks + + @app.task + def add(x, y): + return x + y + + # raises RuntimeError + add.delay(2, 2) + + app.finalize() + # no longer raises: + tasks = app.tasks + add.delay(2, 2) + +- The worker did not send monitoring events during shutdown. + +- Worker: Mingle and gossip is now automatically disabled when + used with an unsupported transport (Issue #1664). + +- ``celery`` command: Preload options now supports + the rare ``--opt value`` format (Issue #1668). + +- ``celery`` command: Accidentally removed options + appearing before the subcommand, these are now moved to the end + instead. + +- Worker now properly responds to ``inspect stats`` commands + even if received before startup is complete (Issue #1659). + +- :signal:`task_postrun` is now sent within a finally block, to make + sure the signal is always sent. + +- Beat: Fixed syntax error in string formatting. + + Contributed by nadad. + +- Fixed typos in the documentation. + + Fixes contributed by Loic Bistuer, sunfinite. + +- Nested chains now works properly when constructed using the + ``chain`` type instead of the ``|`` operator (Issue #1656). + +.. _version-3.1.4: + +3.1.4 +===== +:release-date: 2013-11-15 11:40 P.M UTC +:release-by: Ask Solem + +- Now depends on :ref:`Kombu 3.0.5 `. + +- Now depends on :mod:`billiard` 3.3.0.7 + +- Worker accidentally set a default socket timeout of 5 seconds. + +- Django: Fixup now sets the default app so that threads will use + the same app instance (e.g. for manage.py runserver). + +- Worker: Fixed Unicode error crash at startup experienced by some users. + +- Calling ``.apply_async`` on an empty chain now works again (Issue #1650). + +- The ``celery multi show`` command now generates the same arguments + as the start command does. + +- The ``--app`` argument could end up using a module object instead + of an app instance (with a resulting crash). + +- Fixed a syntax error problem in the celerybeat init script. + + Fix contributed by Vsevolod. + +- Tests now passing on PyPy 2.1 and 2.2. + +.. _version-3.1.3: + +3.1.3 +===== +:release-date: 2013-11-13 00:55 A.M UTC +:release-by: Ask Solem + +- Fixed compatibility problem with Python 2.7.0 - 2.7.5 (Issue #1637) + + ``unpack_from`` started supporting ``memoryview`` arguments + in Python 2.7.6. + +- Worker: :option:`-B` argument accidentally closed files used + for logging. + +- Task decorated tasks now keep their docstring (Issue #1636) + +.. _version-3.1.2: + +3.1.2 +===== +:release-date: 2013-11-12 08:00 P.M UTC +:release-by: Ask Solem + +- Now depends on :mod:`billiard` 3.3.0.6 + +- No longer needs the billiard C extension to be installed. + +- The worker silently ignored task errors. + +- Django: Fixed ``ImproperlyConfigured`` error raised + when no database backend specified. + + Fix contributed by j0hnsmith + +- Prefork pool: Now using ``_multiprocessing.read`` with ``memoryview`` + if available. + +- ``close_open_fds`` now uses ``os.closerange`` if available. + +- ``get_fdmax`` now takes value from ``sysconfig`` if possible. + +.. _version-3.1.1: + +3.1.1 +===== +:release-date: 2013-11-11 06:30 P.M UTC +:release-by: Ask Solem + +- Now depends on :mod:`billiard` 3.3.0.4. + +- Python 3: Fixed compatibility issues. + +- Windows: Accidentally showed warning that the billiard C extension + was not installed (Issue #1630). + +- Django: Tutorial updated with a solution that sets a default + :envvar:`DJANGO_SETTINGS_MODULE` so that it doesn't have to be typed + in with the :program:`celery` command. + + Also fixed typos in the tutorial, and added the settings + required to use the Django database backend. + + Thanks to Chris Ward, orarbel. + +- Django: Fixed a problem when using the Django settings in Django 1.6. + +- Django: Fixup should not be applied if the django loader is active. + +- Worker: Fixed attribute error for ``human_write_stats`` when using the + compatibility prefork pool implementation. + +- Worker: Fixed compatibility with billiard without C extension. + +- Inspect.conf: Now supports a ``with_defaults`` argument. + +- Group.restore: The backend argument was not respected. + +.. _version-3.1.0: + +3.1.0 +======= +:release-date: 2013-11-09 11:00 P.M UTC +:release-by: Ask Solem + +See :ref:`whatsnew-3.1`. diff --git a/docs/community.rst b/docs/community.rst new file mode 100644 index 0000000..437fac6 --- /dev/null +++ b/docs/community.rst @@ -0,0 +1,55 @@ +.. _community: + +======================= +Community Resources +======================= + +This is a list of external blog posts, tutorials and slides related +to Celery. If you have a link that's missing from this list, please +contact the mailing-list or submit a patch. + +.. contents:: + :local: + +.. _community-resources: + +Resources +========= + +.. _res-using-celery: + +Who's using Celery +------------------ + +http://wiki.github.com/celery/celery/using + +.. _res-wiki: + +Wiki +---- + +http://wiki.github.com/celery/celery/ + +.. _res-stackoverflow: + +Celery questions on Stack Overflow +---------------------------------- + +http://stackoverflow.com/search?q=celery&tab=newest + +.. _res-mailing-list-archive: + +Mailing-list Archive: celery-users +---------------------------------- + +http://blog.gmane.org/gmane.comp.python.amqp.celery.user + +.. _res-irc-logs: + +.. _community-news: + +News +==== + +This section has moved to the Celery homepage: +http://celeryproject.org/community/ diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 0000000..caa3215 --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,167 @@ +# -*- coding: utf-8 -*- + +import sys +import os + +this = os.path.dirname(os.path.abspath(__file__)) + +# If your extensions are in another directory, add it here. If the directory +# is relative to the documentation root, use os.path.abspath to make it +# absolute, like shown here. +sys.path.insert(0, os.path.join(this, os.pardir)) +sys.path.append(os.path.join(this, '_ext')) +import celery + +# General configuration +# --------------------- + +extensions = ['sphinx.ext.autodoc', + 'sphinx.ext.coverage', + 'sphinx.ext.pngmath', + 'sphinx.ext.viewcode', + 'sphinx.ext.coverage', + 'sphinx.ext.intersphinx', + 'celery.contrib.sphinx', + 'githubsphinx', + 'celerydocs'] + + +LINKCODE_URL = 'http://github.com/{proj}/tree/{branch}/{filename}.py' +GITHUB_PROJECT = 'celery/celery' +GITHUB_BRANCH = 'master' + + +def linkcode_resolve(domain, info): + if domain != 'py' or not info['module']: + return + filename = info['module'].replace('.', '/') + return LINKCODE_URL.format( + proj=GITHUB_PROJECT, + branch=GITHUB_BRANCH, + filename=filename, + ) + +html_show_sphinx = False + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['.templates'] + +# The suffix of source filenames. +source_suffix = '.rst' + +# The master toctree document. +master_doc = 'index' + +# General information about the project. +project = 'Celery' +copyright = '2009-2014, Ask Solem & Contributors' + +# The version info for the project you're documenting, acts as replacement for +# |version| and |release|, also used in various other places throughout the +# built documents. +# +# The short X.Y version. +version = '.'.join(map(str, celery.VERSION[0:2])) +# The full version, including alpha/beta/rc tags. +release = celery.__version__ + +exclude_trees = ['.build'] + +#unused_docs = [ +# 'xreftest.rst', +# 'tutorials/otherqueues', +#] + +# If true, '()' will be appended to :func: etc. cross-reference text. +add_function_parentheses = True + +def check_object_path(key, url, path): + if os.path.isfile(path): + return {key: (url, path)} + return {} + +intersphinx_mapping = {} +intersphinx_mapping.update(check_object_path('python', + 'http://docs.python.org/', + '/usr/share/doc/python' + + '.'.join([str(x) for x in sys.version_info[0:2]]) + + '/html/objects.inv')) +intersphinx_mapping.update(check_object_path('kombu', + 'http://kombu.readthedocs.org/en/latest/', + '/usr/share/doc/python-kombu-doc/html/objects.inv')) +intersphinx_mapping.update(check_object_path('amqp', + 'http://amqp.readthedocs.org/en/latest/', + '/usr/share/doc/python-amqp-doc/html/objects.inv')) + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = 'colorful' + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['.static'] + +html_use_smartypants = True + +add_module_names = True +highlight_language = 'python3' + +# If false, no module index is generated. +html_use_modindex = True + +# If false, no index is generated. +html_use_index = True + +latex_documents = [ + ('index', 'Celery.tex', 'Celery Documentation', + 'Ask Solem & Contributors', 'manual'), +] + +html_theme = 'celery' +html_theme_path = ['_theme'] +html_sidebars = { + 'index': ['sidebarintro.html', 'sourcelink.html', 'searchbox.html'], + '**': ['sidebarlogo.html', 'relations.html', + 'sourcelink.html', 'searchbox.html'], +} + +### Issuetracker + +github_project = 'celery/celery' + +# -- Options for Epub output ------------------------------------------- + +# Bibliographic Dublin Core info. +epub_title = 'Celery Manual, Version {0}'.format(version) +epub_author = 'Ask Solem' +epub_publisher = 'Celery Project' +epub_copyright = '2009-2014' + +# The language of the text. It defaults to the language option +# or en if the language is not set. +epub_language = 'en' + +# The scheme of the identifier. Typical schemes are ISBN or URL. +epub_scheme = 'ISBN' + +# The unique identifier of the text. This can be a ISBN number +# or the project homepage. +epub_identifier = 'celeryproject.org' + +# A unique identification for the text. +epub_uid = 'Celery Manual, Version {0}'.format(version) + +# HTML files that should be inserted before the pages created by sphinx. +# The format is a list of tuples containing the path and title. +#epub_pre_files = [] + +# HTML files shat should be inserted after the pages created by sphinx. +# The format is a list of tuples containing the path and title. +#epub_post_files = [] + +# A list of files that should not be packed into the epub file. +epub_exclude_files = ['search.html'] + + +# The depth of the table of contents in toc.ncx. +epub_tocdepth = 3 diff --git a/docs/configuration.rst b/docs/configuration.rst new file mode 100644 index 0000000..3f787f2 --- /dev/null +++ b/docs/configuration.rst @@ -0,0 +1,1861 @@ +.. _configuration: + +============================ + Configuration and defaults +============================ + +This document describes the configuration options available. + +If you're using the default loader, you must create the :file:`celeryconfig.py` +module and make sure it is available on the Python path. + +.. contents:: + :local: + :depth: 2 + +.. _conf-example: + +Example configuration file +========================== + +This is an example configuration file to get you started. +It should contain all you need to run a basic Celery set-up. + +.. code-block:: python + + ## Broker settings. + BROKER_URL = 'amqp://guest:guest@localhost:5672//' + + # List of modules to import when celery starts. + CELERY_IMPORTS = ('myapp.tasks', ) + + ## Using the database to store task state and results. + CELERY_RESULT_BACKEND = 'db+sqlite:///results.db' + + CELERY_ANNOTATIONS = {'tasks.add': {'rate_limit': '10/s'}} + + +Configuration Directives +======================== + +.. _conf-datetime: + +Time and date settings +---------------------- + +.. setting:: CELERY_ENABLE_UTC + +CELERY_ENABLE_UTC +~~~~~~~~~~~~~~~~~ + +.. versionadded:: 2.5 + +If enabled dates and times in messages will be converted to use +the UTC timezone. + +Note that workers running Celery versions below 2.5 will assume a local +timezone for all messages, so only enable if all workers have been +upgraded. + +Enabled by default since version 3.0. + +.. setting:: CELERY_TIMEZONE + +CELERY_TIMEZONE +~~~~~~~~~~~~~~~ + +Configure Celery to use a custom time zone. +The timezone value can be any time zone supported by the `pytz`_ +library. + +If not set the UTC timezone is used. For backwards compatibility +there is also a :setting:`CELERY_ENABLE_UTC` setting, and this is set +to false the system local timezone is used instead. + +.. _`pytz`: http://pypi.python.org/pypi/pytz/ + + + +.. _conf-tasks: + +Task settings +------------- + +.. setting:: CELERY_ANNOTATIONS + +CELERY_ANNOTATIONS +~~~~~~~~~~~~~~~~~~ + +This setting can be used to rewrite any task attribute from the +configuration. The setting can be a dict, or a list of annotation +objects that filter for tasks and return a map of attributes +to change. + + +This will change the ``rate_limit`` attribute for the ``tasks.add`` +task: + +.. code-block:: python + + CELERY_ANNOTATIONS = {'tasks.add': {'rate_limit': '10/s'}} + +or change the same for all tasks: + +.. code-block:: python + + CELERY_ANNOTATIONS = {'*': {'rate_limit': '10/s'}} + + +You can change methods too, for example the ``on_failure`` handler: + +.. code-block:: python + + def my_on_failure(self, exc, task_id, args, kwargs, einfo): + print('Oh no! Task failed: {0!r}'.format(exc)) + + CELERY_ANNOTATIONS = {'*': {'on_failure': my_on_failure}} + + +If you need more flexibility then you can use objects +instead of a dict to choose which tasks to annotate: + +.. code-block:: python + + class MyAnnotate(object): + + def annotate(self, task): + if task.name.startswith('tasks.'): + return {'rate_limit': '10/s'} + + CELERY_ANNOTATIONS = (MyAnnotate(), {…}) + + + +.. _conf-concurrency: + +Concurrency settings +-------------------- + +.. setting:: CELERYD_CONCURRENCY + +CELERYD_CONCURRENCY +~~~~~~~~~~~~~~~~~~~ + +The number of concurrent worker processes/threads/green threads executing +tasks. + +If you're doing mostly I/O you can have more processes, +but if mostly CPU-bound, try to keep it close to the +number of CPUs on your machine. If not set, the number of CPUs/cores +on the host will be used. + +Defaults to the number of available CPUs. + +.. setting:: CELERYD_PREFETCH_MULTIPLIER + +CELERYD_PREFETCH_MULTIPLIER +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +How many messages to prefetch at a time multiplied by the number of +concurrent processes. The default is 4 (four messages for each +process). The default setting is usually a good choice, however -- if you +have very long running tasks waiting in the queue and you have to start the +workers, note that the first worker to start will receive four times the +number of messages initially. Thus the tasks may not be fairly distributed +to the workers. + +.. note:: + + Tasks with ETA/countdown are not affected by prefetch limits. + +.. _conf-result-backend: + +Task result backend settings +---------------------------- + +.. setting:: CELERY_RESULT_BACKEND + +CELERY_RESULT_BACKEND +~~~~~~~~~~~~~~~~~~~~~ +:Deprecated aliases: ``CELERY_BACKEND`` + +The backend used to store task results (tombstones). +Disabled by default. +Can be one of the following: + +* database + Use a relational database supported by `SQLAlchemy`_. + See :ref:`conf-database-result-backend`. + +* cache + Use `memcached`_ to store the results. + See :ref:`conf-cache-result-backend`. + +* mongodb + Use `MongoDB`_ to store the results. + See :ref:`conf-mongodb-result-backend`. + +* redis + Use `Redis`_ to store the results. + See :ref:`conf-redis-result-backend`. + +* amqp + Send results back as AMQP messages + See :ref:`conf-amqp-result-backend`. + +* cassandra + Use `Cassandra`_ to store the results. + See :ref:`conf-cassandra-result-backend`. + +* ironcache + Use `IronCache`_ to store the results. + See :ref:`conf-ironcache-result-backend`. + +* couchbase + Use `Couchbase`_ to store the results. + See :ref:`conf-couchbase-result-backend`. + +.. warning: + + While the AMQP result backend is very efficient, you must make sure + you only receive the same result once. See :doc:`userguide/calling`). + +.. _`SQLAlchemy`: http://sqlalchemy.org +.. _`memcached`: http://memcached.org +.. _`MongoDB`: http://mongodb.org +.. _`Redis`: http://redis.io +.. _`Cassandra`: http://cassandra.apache.org/ +.. _`IronCache`: http://www.iron.io/cache +.. _`Couchbase`: http://www.couchbase.com/ + + +.. setting:: CELERY_RESULT_SERIALIZER + +CELERY_RESULT_SERIALIZER +~~~~~~~~~~~~~~~~~~~~~~~~ + +Result serialization format. Default is ``pickle``. See +:ref:`calling-serializers` for information about supported +serialization formats. + +.. _conf-database-result-backend: + +Database backend settings +------------------------- + +Database URL Examples +~~~~~~~~~~~~~~~~~~~~~ + +To use the database backend you have to configure the +:setting:`CELERY_RESULT_BACKEND` setting with a connection URL and the ``db+`` +prefix: + +.. code-block:: python + + CELERY_RESULT_BACKEND = 'db+scheme://user:password@host:port/dbname' + +Examples: + + # sqlite (filename) + CELERY_RESULT_BACKEND = 'db+sqlite:///results.sqlite' + + # mysql + CELERY_RESULT_BACKEND = 'db+mysql://scott:tiger@localhost/foo' + + # postgresql + CELERY_RESULT_BACKEND = 'db+postgresql://scott:tiger@localhost/mydatabase' + + # oracle + CELERY_RESULT_BACKEND = 'db+oracle://scott:tiger@127.0.0.1:1521/sidname' + +.. code-block:: python + +Please see `Supported Databases`_ for a table of supported databases, +and `Connection String`_ for more information about connection +strings (which is the part of the URI that comes after the ``db+`` prefix). + +.. _`Supported Databases`: + http://www.sqlalchemy.org/docs/core/engines.html#supported-databases + +.. _`Connection String`: + http://www.sqlalchemy.org/docs/core/engines.html#database-urls + +.. setting:: CELERY_RESULT_DBURI + +CELERY_RESULT_DBURI +~~~~~~~~~~~~~~~~~~~ + +This setting is no longer used as it's now possible to specify +the database URL directly in the :setting:`CELERY_RESULT_BACKEND` setting. + +.. setting:: CELERY_RESULT_ENGINE_OPTIONS + +CELERY_RESULT_ENGINE_OPTIONS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +To specify additional SQLAlchemy database engine options you can use +the :setting:`CELERY_RESULT_ENGINE_OPTIONS` setting:: + + # echo enables verbose logging from SQLAlchemy. + CELERY_RESULT_ENGINE_OPTIONS = {'echo': True} + + +.. setting:: CELERY_RESULT_DB_SHORT_LIVED_SESSIONS + CELERY_RESULT_DB_SHORT_LIVED_SESSIONS = True + +Short lived sessions are disabled by default. If enabled they can drastically reduce +performance, especially on systems processing lots of tasks. This option is useful +on low-traffic workers that experience errors as a result of cached database connections +going stale through inactivity. For example, intermittent errors like +`(OperationalError) (2006, 'MySQL server has gone away')` can be fixed by enabling +short lived sessions. This option only affects the database backend. + +Specifying Table Names +~~~~~~~~~~~~~~~~~~~~~~ + +.. setting:: CELERY_RESULT_DB_TABLENAMES + +When SQLAlchemy is configured as the result backend, Celery automatically +creates two tables to store result metadata for tasks. This setting allows +you to customize the table names: + +.. code-block:: python + + # use custom table names for the database result backend. + CELERY_RESULT_DB_TABLENAMES = { + 'task': 'myapp_taskmeta', + 'group': 'myapp_groupmeta', + } + +.. _conf-amqp-result-backend: + +AMQP backend settings +--------------------- + +.. note:: + + The AMQP backend requires RabbitMQ 1.1.0 or higher to automatically + expire results. If you are running an older version of RabbitmQ + you should disable result expiration like this: + + CELERY_TASK_RESULT_EXPIRES = None + +.. setting:: CELERY_RESULT_EXCHANGE + +CELERY_RESULT_EXCHANGE +~~~~~~~~~~~~~~~~~~~~~~ + +Name of the exchange to publish results in. Default is `celeryresults`. + +.. setting:: CELERY_RESULT_EXCHANGE_TYPE + +CELERY_RESULT_EXCHANGE_TYPE +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The exchange type of the result exchange. Default is to use a `direct` +exchange. + +.. setting:: CELERY_RESULT_PERSISTENT + +CELERY_RESULT_PERSISTENT +~~~~~~~~~~~~~~~~~~~~~~~~ + +If set to :const:`True`, result messages will be persistent. This means the +messages will not be lost after a broker restart. The default is for the +results to be transient. + +Example configuration +~~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: python + + CELERY_RESULT_BACKEND = 'amqp' + CELERY_TASK_RESULT_EXPIRES = 18000 # 5 hours. + +.. _conf-cache-result-backend: + +Cache backend settings +---------------------- + +.. note:: + + The cache backend supports the `pylibmc`_ and `python-memcached` + libraries. The latter is used only if `pylibmc`_ is not installed. + +Using a single memcached server: + +.. code-block:: python + + CELERY_RESULT_BACKEND = 'cache+memcached://127.0.0.1:11211/' + +Using multiple memcached servers: + +.. code-block:: python + + CELERY_RESULT_BACKEND = """ + cache+memcached://172.19.26.240:11211;172.19.26.242:11211/ + """.strip() + +.. setting:: CELERY_CACHE_BACKEND_OPTIONS + +The "memory" backend stores the cache in memory only: + + CELERY_CACHE_BACKEND = 'memory' + +CELERY_CACHE_BACKEND_OPTIONS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +You can set pylibmc options using the :setting:`CELERY_CACHE_BACKEND_OPTIONS` +setting: + +.. code-block:: python + + CELERY_CACHE_BACKEND_OPTIONS = {'binary': True, + 'behaviors': {'tcp_nodelay': True}} + +.. _`pylibmc`: http://sendapatch.se/projects/pylibmc/ + +.. setting:: CELERY_CACHE_BACKEND + +CELERY_CACHE_BACKEND +~~~~~~~~~~~~~~~~~~~~ + +This setting is no longer used as it's now possible to specify +the cache backend directly in the :setting:`CELERY_RESULT_BACKEND` setting. + +.. _conf-redis-result-backend: + +Redis backend settings +---------------------- + +Configuring the backend URL +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. note:: + + The Redis backend requires the :mod:`redis` library: + http://pypi.python.org/pypi/redis/ + + To install the redis package use `pip` or `easy_install`: + + .. code-block:: bash + + $ pip install redis + +This backend requires the :setting:`CELERY_RESULT_BACKEND` +setting to be set to a Redis URL:: + + CELERY_RESULT_BACKEND = 'redis://:password@host:port/db' + +For example:: + + CELERY_RESULT_BACKEND = 'redis://localhost/0' + +which is the same as:: + + CELERY_RESULT_BACKEND = 'redis://' + +The fields of the URL is defined as folows: + +- *host* + +Host name or IP address of the Redis server. e.g. `localhost`. + +- *port* + +Port to the Redis server. Default is 6379. + +- *db* + +Database number to use. Default is 0. +The db can include an optional leading slash. + +- *password* + +Password used to connect to the database. + +.. setting:: CELERY_REDIS_MAX_CONNECTIONS + +CELERY_REDIS_MAX_CONNECTIONS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Maximum number of connections available in the Redis connection +pool used for sending and retrieving results. + +.. _conf-mongodb-result-backend: + +MongoDB backend settings +------------------------ + +.. note:: + + The MongoDB backend requires the :mod:`pymongo` library: + http://github.com/mongodb/mongo-python-driver/tree/master + +.. setting:: CELERY_MONGODB_BACKEND_SETTINGS + +CELERY_MONGODB_BACKEND_SETTINGS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This is a dict supporting the following keys: + +* database + The database name to connect to. Defaults to ``celery``. + +* taskmeta_collection + The collection name to store task meta data. + Defaults to ``celery_taskmeta``. + +* max_pool_size + Passed as max_pool_size to PyMongo's Connection or MongoClient + constructor. It is the maximum number of TCP connections to keep + open to MongoDB at a given time. If there are more open connections + than max_pool_size, sockets will be closed when they are released. + Defaults to 10. + +* options + + Additional keyword arguments to pass to the mongodb connection + constructor. See the :mod:`pymongo` docs to see a list of arguments + supported. + +.. _example-mongodb-result-config: + +Example configuration +~~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: python + + CELERY_RESULT_BACKEND = 'mongodb://192.168.1.100:30000/' + CELERY_MONGODB_BACKEND_SETTINGS = { + 'database': 'mydb', + 'taskmeta_collection': 'my_taskmeta_collection', + } + +.. _conf-cassandra-result-backend: + +Cassandra backend settings +-------------------------- + +.. note:: + + The Cassandra backend requires the :mod:`pycassa` library: + http://pypi.python.org/pypi/pycassa/ + + To install the pycassa package use `pip` or `easy_install`: + + .. code-block:: bash + + $ pip install pycassa + +This backend requires the following configuration directives to be set. + +.. setting:: CASSANDRA_SERVERS + +CASSANDRA_SERVERS +~~~~~~~~~~~~~~~~~ + +List of ``host:port`` Cassandra servers. e.g.:: + + CASSANDRA_SERVERS = ['localhost:9160'] + +.. setting:: CASSANDRA_KEYSPACE + +CASSANDRA_KEYSPACE +~~~~~~~~~~~~~~~~~~ + +The keyspace in which to store the results. e.g.:: + + CASSANDRA_KEYSPACE = 'tasks_keyspace' + +.. setting:: CASSANDRA_COLUMN_FAMILY + +CASSANDRA_COLUMN_FAMILY +~~~~~~~~~~~~~~~~~~~~~~~ + +The column family in which to store the results. e.g.:: + + CASSANDRA_COLUMN_FAMILY = 'tasks' + +.. setting:: CASSANDRA_READ_CONSISTENCY + +CASSANDRA_READ_CONSISTENCY +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The read consistency used. Values can be ``ONE``, ``QUORUM`` or ``ALL``. + +.. setting:: CASSANDRA_WRITE_CONSISTENCY + +CASSANDRA_WRITE_CONSISTENCY +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The write consistency used. Values can be ``ONE``, ``QUORUM`` or ``ALL``. + +.. setting:: CASSANDRA_DETAILED_MODE + +CASSANDRA_DETAILED_MODE +~~~~~~~~~~~~~~~~~~~~~~~ + +Enable or disable detailed mode. Default is :const:`False`. +This mode allows to use the power of Cassandra wide columns to +store all states for a task as a wide column, instead of only the last one. + +To use this mode, you need to configure your ColumnFamily to +use the ``TimeUUID`` type as a comparator:: + + create column family task_results with comparator = TimeUUIDType; + +CASSANDRA_OPTIONS +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Options to be passed to the `pycassa connection pool`_ (optional). + +.. _`pycassa connection pool`: http://pycassa.github.com/pycassa/api/pycassa/pool.html + +Example configuration +~~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: python + + CASSANDRA_SERVERS = ['localhost:9160'] + CASSANDRA_KEYSPACE = 'celery' + CASSANDRA_COLUMN_FAMILY = 'task_results' + CASSANDRA_READ_CONSISTENCY = 'ONE' + CASSANDRA_WRITE_CONSISTENCY = 'ONE' + CASSANDRA_DETAILED_MODE = True + CASSANDRA_OPTIONS = { + 'timeout': 300, + 'max_retries': 10 + } + + +.. _conf-ironcache-result-backend: + +IronCache backend settings +-------------------------- + +.. note:: + + The IronCache backend requires the :mod:`iron_celery` library: + http://pypi.python.org/pypi/iron_celery + + To install the iron_celery package use `pip` or `easy_install`: + + .. code-block:: bash + + $ pip install iron_celery + +IronCache is configured via the URL provided in :setting:`CELERY_RESULT_BACKEND`, for example:: + + CELERY_RESULT_BACKEND = 'ironcache://project_id:token@' + +Or to change the cache name:: + + ironcache:://project_id:token@/awesomecache + +For more information, see: https://github.com/iron-io/iron_celery + + +.. _conf-couchbase-result-backend: + +Couchbase backend settings +-------------------------- + +.. note:: + + The Couchbase backend requires the :mod:`couchbase` library: + https://pypi.python.org/pypi/couchbase + + To install the couchbase package use `pip` or `easy_install`: + + .. code-block:: bash + + $ pip install couchbase + +This backend can be configured via the :setting:`CELERY_RESULT_BACKEND` +set to a couchbase URL:: + + CELERY_RESULT_BACKEND = 'couchbase://username:password@host:port/bucket' + + +.. setting:: CELERY_COUCHBASE_BACKEND_SETTINGS + +CELERY_COUCHBASE_BACKEND_SETTINGS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This is a dict supporting the following keys: + +* host + Host name of the Couchbase server. Defaults to ``localhost``. + +* port + The port the Couchbase server is listening to. Defaults to ``8091``. + +* bucket + The default bucket the Couchbase server is writing to. + Defaults to ``default``. + +* username + User name to authenticate to the Couchbase server as (optional). + +* password + Password to authenticate to the Couchbase server (optional). + + +.. _conf-messaging: + +Message Routing +--------------- + +.. _conf-messaging-routing: + +.. setting:: CELERY_QUEUES + +CELERY_QUEUES +~~~~~~~~~~~~~ + +The mapping of queues the worker consumes from. This is a dictionary +of queue name/options. See :ref:`guide-routing` for more information. + +The default is a queue/exchange/binding key of ``celery``, with +exchange type ``direct``. + +You don't have to care about this unless you want custom routing facilities. + +.. setting:: CELERY_ROUTES + +CELERY_ROUTES +~~~~~~~~~~~~~ + +A list of routers, or a single router used to route tasks to queues. +When deciding the final destination of a task the routers are consulted +in order. See :ref:`routers` for more information. + +.. setting:: CELERY_QUEUE_HA_POLICY + +CELERY_QUEUE_HA_POLICY +~~~~~~~~~~~~~~~~~~~~~~ +:brokers: RabbitMQ + +This will set the default HA policy for a queue, and the value +can either be a string (usually ``all``): + +.. code-block:: python + + CELERY_QUEUE_HA_POLICY = 'all' + +Using 'all' will replicate the queue to all current nodes, +Or you can give it a list of nodes to replicate to: + +.. code-block:: python + + CELERY_QUEUE_HA_POLICY = ['rabbit@host1', 'rabbit@host2'] + + +Using a list will implicitly set ``x-ha-policy`` to 'nodes' and +``x-ha-policy-params`` to the given list of nodes. + +See http://www.rabbitmq.com/ha.html for more information. + +.. setting:: CELERY_WORKER_DIRECT + +CELERY_WORKER_DIRECT +~~~~~~~~~~~~~~~~~~~~ + +This option enables so that every worker has a dedicated queue, +so that tasks can be routed to specific workers. + +The queue name for each worker is automatically generated based on +the worker hostname and a ``.dq`` suffix, using the ``C.dq`` exchange. + +For example the queue name for the worker with node name ``w1@example.com`` +becomes:: + + w1@example.com.dq + +Then you can route the task to the task by specifying the hostname +as the routing key and the ``C.dq`` exchange:: + + CELERY_ROUTES = { + 'tasks.add': {'exchange': 'C.dq', 'routing_key': 'w1@example.com'} + } + +.. setting:: CELERY_CREATE_MISSING_QUEUES + +CELERY_CREATE_MISSING_QUEUES +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If enabled (default), any queues specified that are not defined in +:setting:`CELERY_QUEUES` will be automatically created. See +:ref:`routing-automatic`. + +.. setting:: CELERY_DEFAULT_QUEUE + +CELERY_DEFAULT_QUEUE +~~~~~~~~~~~~~~~~~~~~ + +The name of the default queue used by `.apply_async` if the message has +no route or no custom queue has been specified. + + +This queue must be listed in :setting:`CELERY_QUEUES`. +If :setting:`CELERY_QUEUES` is not specified then it is automatically +created containing one queue entry, where this name is used as the name of +that queue. + +The default is: `celery`. + +.. seealso:: + + :ref:`routing-changing-default-queue` + +.. setting:: CELERY_DEFAULT_EXCHANGE + +CELERY_DEFAULT_EXCHANGE +~~~~~~~~~~~~~~~~~~~~~~~ + +Name of the default exchange to use when no custom exchange is +specified for a key in the :setting:`CELERY_QUEUES` setting. + +The default is: `celery`. + +.. setting:: CELERY_DEFAULT_EXCHANGE_TYPE + +CELERY_DEFAULT_EXCHANGE_TYPE +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Default exchange type used when no custom exchange type is specified +for a key in the :setting:`CELERY_QUEUES` setting. +The default is: `direct`. + +.. setting:: CELERY_DEFAULT_ROUTING_KEY + +CELERY_DEFAULT_ROUTING_KEY +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The default routing key used when no custom routing key +is specified for a key in the :setting:`CELERY_QUEUES` setting. + +The default is: `celery`. + +.. setting:: CELERY_DEFAULT_DELIVERY_MODE + +CELERY_DEFAULT_DELIVERY_MODE +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Can be `transient` or `persistent`. The default is to send +persistent messages. + +.. _conf-broker-settings: + +Broker Settings +--------------- + +.. setting:: CELERY_ACCEPT_CONTENT + +CELERY_ACCEPT_CONTENT +~~~~~~~~~~~~~~~~~~~~~ + +A whitelist of content-types/serializers to allow. + +If a message is received that is not in this list then +the message will be discarded with an error. + +By default any content type is enabled (including pickle and yaml) +so make sure untrusted parties do not have access to your broker. +See :ref:`guide-security` for more. + +Example:: + + # using serializer name + CELERY_ACCEPT_CONTENT = ['json'] + + # or the actual content-type (MIME) + CELERY_ACCEPT_CONTENT = ['application/json'] + +.. setting:: BROKER_FAILOVER_STRATEGY + +BROKER_FAILOVER_STRATEGY +~~~~~~~~~~~~~~~~~~~~~~~~ + +Default failover strategy for the broker Connection object. If supplied, +may map to a key in 'kombu.connection.failover_strategies', or be a reference +to any method that yields a single item from a supplied list. + +Example:: + + # Random failover strategy + def random_failover_strategy(servers): + it = list(it) # don't modify callers list + shuffle = random.shuffle + for _ in repeat(None): + shuffle(it) + yield it[0] + + BROKER_FAILOVER_STRATEGY=random_failover_strategy + +.. setting:: BROKER_TRANSPORT + +BROKER_FAILOVER_STRATEGY +~~~~~~~~~~~~~~~~~~~~~~~~ + +Default failover strategy for the broker Connection object. If supplied, +may map to a key in 'kombu.connection.failover_strategies', or be a reference +to any method that yields a single item from a supplied list. + +Example:: + + # Random failover strategy + def random_failover_strategy(servers): + it = list(it) # don't modify callers list + shuffle = random.shuffle + for _ in repeat(None): + shuffle(it) + yield it[0] + + BROKER_FAILOVER_STRATEGY=random_failover_strategy + + +BROKER_TRANSPORT +~~~~~~~~~~~~~~~~ +:Aliases: ``BROKER_BACKEND`` +:Deprecated aliases: ``CARROT_BACKEND`` + +.. setting:: BROKER_URL + +BROKER_URL +~~~~~~~~~~ + +Default broker URL. This must be an URL in the form of:: + + transport://userid:password@hostname:port/virtual_host + +Only the scheme part (``transport://``) is required, the rest +is optional, and defaults to the specific transports default values. + +The transport part is the broker implementation to use, and the +default is ``amqp``, which uses ``librabbitmq`` by default or falls back to +``pyamqp`` if that is not installed. Also there are many other choices including +``redis``, ``beanstalk``, ``sqlalchemy``, ``django``, ``mongodb``, +``couchdb``. +It can also be a fully qualified path to your own transport implementation. + +See :ref:`kombu:connection-urls` in the Kombu documentation for more +information. + +.. setting:: BROKER_HEARTBEAT + +BROKER_HEARTBEAT +~~~~~~~~~~~~~~~~ +:transports supported: ``pyamqp`` + +It's not always possible to detect connection loss in a timely +manner using TCP/IP alone, so AMQP defines something called heartbeats +that's is used both by the client and the broker to detect if +a connection was closed. + +Hartbeats are disabled by default. + +If the heartbeat value is 10 seconds, then +the heartbeat will be monitored at the interval specified +by the :setting:`BROKER_HEARTBEAT_CHECKRATE` setting, which by default is +double the rate of the heartbeat value +(so for the default 10 seconds, the heartbeat is checked every 5 seconds). + +.. setting:: BROKER_HEARTBEAT_CHECKRATE + +BROKER_HEARTBEAT_CHECKRATE +~~~~~~~~~~~~~~~~~~~~~~~~~~ +:transports supported: ``pyamqp`` + +At intervals the worker will monitor that the broker has not missed +too many heartbeats. The rate at which this is checked is calculated +by dividing the :setting:`BROKER_HEARTBEAT` value with this value, +so if the heartbeat is 10.0 and the rate is the default 2.0, the check +will be performed every 5 seconds (twice the heartbeat sending rate). + +.. setting:: BROKER_USE_SSL + +BROKER_USE_SSL +~~~~~~~~~~~~~~ + +Use SSL to connect to the broker. Off by default. This may not be supported +by all transports. + +.. setting:: BROKER_POOL_LIMIT + +BROKER_POOL_LIMIT +~~~~~~~~~~~~~~~~~ + +.. versionadded:: 2.3 + +The maximum number of connections that can be open in the connection pool. + +The pool is enabled by default since version 2.5, with a default limit of ten +connections. This number can be tweaked depending on the number of +threads/greenthreads (eventlet/gevent) using a connection. For example +running eventlet with 1000 greenlets that use a connection to the broker, +contention can arise and you should consider increasing the limit. + +If set to :const:`None` or 0 the connection pool will be disabled and +connections will be established and closed for every use. + +Default (since 2.5) is to use a pool of 10 connections. + +.. setting:: BROKER_CONNECTION_TIMEOUT + +BROKER_CONNECTION_TIMEOUT +~~~~~~~~~~~~~~~~~~~~~~~~~ + +The default timeout in seconds before we give up establishing a connection +to the AMQP server. Default is 4 seconds. + +.. setting:: BROKER_CONNECTION_RETRY + +BROKER_CONNECTION_RETRY +~~~~~~~~~~~~~~~~~~~~~~~ + +Automatically try to re-establish the connection to the AMQP broker if lost. + +The time between retries is increased for each retry, and is +not exhausted before :setting:`BROKER_CONNECTION_MAX_RETRIES` is +exceeded. + +This behavior is on by default. + +.. setting:: BROKER_CONNECTION_MAX_RETRIES + +BROKER_CONNECTION_MAX_RETRIES +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Maximum number of retries before we give up re-establishing a connection +to the AMQP broker. + +If this is set to :const:`0` or :const:`None`, we will retry forever. + +Default is 100 retries. + +.. setting:: BROKER_LOGIN_METHOD + +BROKER_LOGIN_METHOD +~~~~~~~~~~~~~~~~~~~ + +Set custom amqp login method, default is ``AMQPLAIN``. + +.. setting:: BROKER_TRANSPORT_OPTIONS + +BROKER_TRANSPORT_OPTIONS +~~~~~~~~~~~~~~~~~~~~~~~~ + +.. versionadded:: 2.2 + +A dict of additional options passed to the underlying transport. + +See your transport user manual for supported options (if any). + +Example setting the visibility timeout (supported by Redis and SQS +transports): + +.. code-block:: python + + BROKER_TRANSPORT_OPTIONS = {'visibility_timeout': 18000} # 5 hours + +.. _conf-task-execution: + +Task execution settings +----------------------- + +.. setting:: CELERY_ALWAYS_EAGER + +CELERY_ALWAYS_EAGER +~~~~~~~~~~~~~~~~~~~ + +If this is :const:`True`, all tasks will be executed locally by blocking until +the task returns. ``apply_async()`` and ``Task.delay()`` will return +an :class:`~celery.result.EagerResult` instance, which emulates the API +and behavior of :class:`~celery.result.AsyncResult`, except the result +is already evaluated. + +That is, tasks will be executed locally instead of being sent to +the queue. + +.. setting:: CELERY_EAGER_PROPAGATES_EXCEPTIONS + +CELERY_EAGER_PROPAGATES_EXCEPTIONS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If this is :const:`True`, eagerly executed tasks (applied by `task.apply()`, +or when the :setting:`CELERY_ALWAYS_EAGER` setting is enabled), will +propagate exceptions. + +It's the same as always running ``apply()`` with ``throw=True``. + +.. setting:: CELERY_IGNORE_RESULT + +CELERY_IGNORE_RESULT +~~~~~~~~~~~~~~~~~~~~ + +Whether to store the task return values or not (tombstones). +If you still want to store errors, just not successful return values, +you can set :setting:`CELERY_STORE_ERRORS_EVEN_IF_IGNORED`. + +.. setting:: CELERY_MESSAGE_COMPRESSION + +CELERY_MESSAGE_COMPRESSION +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Default compression used for task messages. +Can be ``gzip``, ``bzip2`` (if available), or any custom +compression schemes registered in the Kombu compression registry. + +The default is to send uncompressed messages. + +.. setting:: CELERY_TASK_RESULT_EXPIRES + +CELERY_TASK_RESULT_EXPIRES +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Time (in seconds, or a :class:`~datetime.timedelta` object) for when after +stored task tombstones will be deleted. + +A built-in periodic task will delete the results after this time +(:class:`celery.task.backend_cleanup`). + +A value of :const:`None` or 0 means results will never expire (depending +on backend specifications). + +Default is to expire after 1 day. + +.. note:: + + For the moment this only works with the amqp, database, cache, redis and MongoDB + backends. + + When using the database or MongoDB backends, `celery beat` must be + running for the results to be expired. + +.. setting:: CELERY_MAX_CACHED_RESULTS + +CELERY_MAX_CACHED_RESULTS +~~~~~~~~~~~~~~~~~~~~~~~~~ + +Result backends caches ready results used by the client. + +This is the total number of results to cache before older results are evicted. +The default is 5000. 0 or None means no limit, and a value of :const:`-1` +will disable the cache. + +.. setting:: CELERY_CHORD_PROPAGATES + +CELERY_CHORD_PROPAGATES +~~~~~~~~~~~~~~~~~~~~~~~ + +.. versionadded:: 3.0.14 + +This setting defines what happens when a task part of a chord raises an +exception: + +- If propagate is True the chord callback will change state to FAILURE + with the exception value set to a :exc:`~@ChordError` + instance containing information about the error and the task that failed. + + This is the default behavior in Celery 3.1+ + +- If propagate is False the exception value will instead be forwarded + to the chord callback. + + This was the default behavior before version 3.1. + +.. setting:: CELERY_TRACK_STARTED + +CELERY_TRACK_STARTED +~~~~~~~~~~~~~~~~~~~~ + +If :const:`True` the task will report its status as "started" when the +task is executed by a worker. The default value is :const:`False` as +the normal behaviour is to not report that level of granularity. Tasks +are either pending, finished, or waiting to be retried. Having a "started" +state can be useful for when there are long running tasks and there is a +need to report which task is currently running. + +.. setting:: CELERY_TASK_SERIALIZER + +CELERY_TASK_SERIALIZER +~~~~~~~~~~~~~~~~~~~~~~ + +A string identifying the default serialization method to use. Can be +`pickle` (default), `json`, `yaml`, `msgpack` or any custom serialization +methods that have been registered with :mod:`kombu.serialization.registry`. + +.. seealso:: + + :ref:`calling-serializers`. + +.. setting:: CELERY_TASK_PUBLISH_RETRY + +CELERY_TASK_PUBLISH_RETRY +~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. versionadded:: 2.2 + +Decides if publishing task messages will be retried in the case +of connection loss or other connection errors. +See also :setting:`CELERY_TASK_PUBLISH_RETRY_POLICY`. + +Enabled by default. + +.. setting:: CELERY_TASK_PUBLISH_RETRY_POLICY + +CELERY_TASK_PUBLISH_RETRY_POLICY +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. versionadded:: 2.2 + +Defines the default policy when retrying publishing a task message in +the case of connection loss or other connection errors. + +See :ref:`calling-retry` for more information. + +.. setting:: CELERY_DEFAULT_RATE_LIMIT + +CELERY_DEFAULT_RATE_LIMIT +~~~~~~~~~~~~~~~~~~~~~~~~~ + +The global default rate limit for tasks. + +This value is used for tasks that does not have a custom rate limit +The default is no rate limit. + +.. setting:: CELERY_DISABLE_RATE_LIMITS + +CELERY_DISABLE_RATE_LIMITS +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Disable all rate limits, even if tasks has explicit rate limits set. + +.. setting:: CELERY_ACKS_LATE + +CELERY_ACKS_LATE +~~~~~~~~~~~~~~~~ + +Late ack means the task messages will be acknowledged **after** the task +has been executed, not *just before*, which is the default behavior. + +.. seealso:: + + FAQ: :ref:`faq-acks_late-vs-retry`. + +.. _conf-worker: + +Worker +------ + +.. setting:: CELERY_IMPORTS + +CELERY_IMPORTS +~~~~~~~~~~~~~~ + +A sequence of modules to import when the worker starts. + +This is used to specify the task modules to import, but also +to import signal handlers and additional remote control commands, etc. + +The modules will be imported in the original order. + +.. setting:: CELERY_INCLUDE + +CELERY_INCLUDE +~~~~~~~~~~~~~~ + +Exact same semantics as :setting:`CELERY_IMPORTS`, but can be used as a means +to have different import categories. + +The modules in this setting are imported after the modules in +:setting:`CELERY_IMPORTS`. + +.. setting:: CELERYD_FORCE_EXECV + +CELERYD_FORCE_EXECV +~~~~~~~~~~~~~~~~~~~ + +On Unix the prefork pool will fork, so that child processes +start with the same memory as the parent process. + +This can cause problems as there is a known deadlock condition +with pthread locking primitives when `fork()` is combined with threads. + +You should enable this setting if you are experiencing hangs (deadlocks), +especially in combination with time limits or having a max tasks per child limit. + +This option will be enabled by default in a later version. + +This is not a problem on Windows, as it does not have `fork()`. + +.. setting:: CELERYD_WORKER_LOST_WAIT + +CELERYD_WORKER_LOST_WAIT +~~~~~~~~~~~~~~~~~~~~~~~~ + +In some cases a worker may be killed without proper cleanup, +and the worker may have published a result before terminating. +This value specifies how long we wait for any missing results before +raising a :exc:`@WorkerLostError` exception. + +Default is 10.0 + +.. setting:: CELERYD_MAX_TASKS_PER_CHILD + +CELERYD_MAX_TASKS_PER_CHILD +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Maximum number of tasks a pool worker process can execute before +it's replaced with a new one. Default is no limit. + +.. setting:: CELERYD_TASK_TIME_LIMIT + +CELERYD_TASK_TIME_LIMIT +~~~~~~~~~~~~~~~~~~~~~~~ + +Task hard time limit in seconds. The worker processing the task will +be killed and replaced with a new one when this is exceeded. + +.. setting:: CELERYD_TASK_SOFT_TIME_LIMIT + +CELERYD_TASK_SOFT_TIME_LIMIT +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Task soft time limit in seconds. + +The :exc:`~@SoftTimeLimitExceeded` exception will be +raised when this is exceeded. The task can catch this to +e.g. clean up before the hard time limit comes. + +Example: + +.. code-block:: python + + from celery.exceptions import SoftTimeLimitExceeded + + @app.task + def mytask(): + try: + return do_work() + except SoftTimeLimitExceeded: + cleanup_in_a_hurry() + +.. setting:: CELERY_STORE_ERRORS_EVEN_IF_IGNORED + +CELERY_STORE_ERRORS_EVEN_IF_IGNORED +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If set, the worker stores all task errors in the result store even if +:attr:`Task.ignore_result ` is on. + +.. setting:: CELERYD_STATE_DB + +CELERYD_STATE_DB +~~~~~~~~~~~~~~~~ + +Name of the file used to stores persistent worker state (like revoked tasks). +Can be a relative or absolute path, but be aware that the suffix `.db` +may be appended to the file name (depending on Python version). + +Can also be set via the :option:`--statedb` argument to +:mod:`~celery.bin.worker`. + +Not enabled by default. + +.. setting:: CELERYD_TIMER_PRECISION + +CELERYD_TIMER_PRECISION +~~~~~~~~~~~~~~~~~~~~~~~ + +Set the maximum time in seconds that the ETA scheduler can sleep between +rechecking the schedule. Default is 1 second. + +Setting this value to 1 second means the schedulers precision will +be 1 second. If you need near millisecond precision you can set this to 0.1. + +.. setting:: CELERY_ENABLE_REMOTE_CONTROL + +CELERY_ENABLE_REMOTE_CONTROL +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Specify if remote control of the workers is enabled. + +Default is :const:`True`. + + +.. _conf-error-mails: + +Error E-Mails +------------- + +.. setting:: CELERY_SEND_TASK_ERROR_EMAILS + +CELERY_SEND_TASK_ERROR_EMAILS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The default value for the `Task.send_error_emails` attribute, which if +set to :const:`True` means errors occurring during task execution will be +sent to :setting:`ADMINS` by email. + +Disabled by default. + +.. setting:: ADMINS + +ADMINS +~~~~~~ + +List of `(name, email_address)` tuples for the administrators that should +receive error emails. + +.. setting:: SERVER_EMAIL + +SERVER_EMAIL +~~~~~~~~~~~~ + +The email address this worker sends emails from. +Default is celery@localhost. + +.. setting:: EMAIL_HOST + +EMAIL_HOST +~~~~~~~~~~ + +The mail server to use. Default is ``localhost``. + +.. setting:: EMAIL_HOST_USER + +EMAIL_HOST_USER +~~~~~~~~~~~~~~~ + +User name (if required) to log on to the mail server with. + +.. setting:: EMAIL_HOST_PASSWORD + +EMAIL_HOST_PASSWORD +~~~~~~~~~~~~~~~~~~~ + +Password (if required) to log on to the mail server with. + +.. setting:: EMAIL_PORT + +EMAIL_PORT +~~~~~~~~~~ + +The port the mail server is listening on. Default is `25`. + + +.. setting:: EMAIL_USE_SSL + +EMAIL_USE_SSL +~~~~~~~~~~~~~ + +Use SSL when connecting to the SMTP server. Disabled by default. + +.. setting:: EMAIL_USE_TLS + +EMAIL_USE_TLS +~~~~~~~~~~~~~ + +Use TLS when connecting to the SMTP server. Disabled by default. + +.. setting:: EMAIL_TIMEOUT + +EMAIL_TIMEOUT +~~~~~~~~~~~~~ + +Timeout in seconds for when we give up trying to connect +to the SMTP server when sending emails. + +The default is 2 seconds. + +.. _conf-example-error-mail-config: + +Example E-Mail configuration +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This configuration enables the sending of error emails to +george@vandelay.com and kramer@vandelay.com: + +.. code-block:: python + + # Enables error emails. + CELERY_SEND_TASK_ERROR_EMAILS = True + + # Name and email addresses of recipients + ADMINS = ( + ('George Costanza', 'george@vandelay.com'), + ('Cosmo Kramer', 'kosmo@vandelay.com'), + ) + + # Email address used as sender (From field). + SERVER_EMAIL = 'no-reply@vandelay.com' + + # Mailserver configuration + EMAIL_HOST = 'mail.vandelay.com' + EMAIL_PORT = 25 + # EMAIL_HOST_USER = 'servers' + # EMAIL_HOST_PASSWORD = 's3cr3t' + +.. _conf-events: + +Events +------ + +.. setting:: CELERY_SEND_EVENTS + +CELERY_SEND_EVENTS +~~~~~~~~~~~~~~~~~~ + +Send events so the worker can be monitored by tools like `celerymon`. + +.. setting:: CELERY_SEND_TASK_SENT_EVENT + +CELERY_SEND_TASK_SENT_EVENT +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. versionadded:: 2.2 + +If enabled, a :event:`task-sent` event will be sent for every task so tasks can be +tracked before they are consumed by a worker. + +Disabled by default. + +.. setting:: CELERY_EVENT_QUEUE_TTL + +CELERY_EVENT_QUEUE_TTL +~~~~~~~~~~~~~~~~~~~~~~ +:transports supported: ``amqp`` + +Message expiry time in seconds (int/float) for when messages sent to a monitor clients +event queue is deleted (``x-message-ttl``) + +For example, if this value is set to 10 then a message delivered to this queue +will be deleted after 10 seconds. + +Disabled by default. + +.. setting:: CELERY_EVENT_QUEUE_EXPIRES + +CELERY_EVENT_QUEUE_EXPIRES +~~~~~~~~~~~~~~~~~~~~~~~~~~ +:transports supported: ``amqp`` + + +Expiry time in seconds (int/float) for when a monitor clients +event queue will be deleted (``x-expires``). + +Default is never, relying on the queue autodelete setting. + +.. setting:: CELERY_EVENT_SERIALIZER + +CELERY_EVENT_SERIALIZER +~~~~~~~~~~~~~~~~~~~~~~~ + +Message serialization format used when sending event messages. +Default is ``json``. See :ref:`calling-serializers`. + +.. _conf-broadcast: + +Broadcast Commands +------------------ + +.. setting:: CELERY_BROADCAST_QUEUE + +CELERY_BROADCAST_QUEUE +~~~~~~~~~~~~~~~~~~~~~~ + +Name prefix for the queue used when listening for broadcast messages. +The workers host name will be appended to the prefix to create the final +queue name. + +Default is ``celeryctl``. + +.. setting:: CELERY_BROADCAST_EXCHANGE + +CELERY_BROADCAST_EXCHANGE +~~~~~~~~~~~~~~~~~~~~~~~~~ + +Name of the exchange used for broadcast messages. + +Default is ``celeryctl``. + +.. setting:: CELERY_BROADCAST_EXCHANGE_TYPE + +CELERY_BROADCAST_EXCHANGE_TYPE +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Exchange type used for broadcast messages. Default is ``fanout``. + +.. _conf-logging: + +Logging +------- + +.. setting:: CELERYD_HIJACK_ROOT_LOGGER + +CELERYD_HIJACK_ROOT_LOGGER +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. versionadded:: 2.2 + +By default any previously configured handlers on the root logger will be +removed. If you want to customize your own logging handlers, then you +can disable this behavior by setting +`CELERYD_HIJACK_ROOT_LOGGER = False`. + +.. note:: + + Logging can also be customized by connecting to the + :signal:`celery.signals.setup_logging` signal. + +.. setting:: CELERYD_LOG_COLOR + +CELERYD_LOG_COLOR +~~~~~~~~~~~~~~~~~ + +Enables/disables colors in logging output by the Celery apps. + +By default colors are enabled if + + 1) the app is logging to a real terminal, and not a file. + 2) the app is not running on Windows. + +.. setting:: CELERYD_LOG_FORMAT + +CELERYD_LOG_FORMAT +~~~~~~~~~~~~~~~~~~ + +The format to use for log messages. + +Default is `[%(asctime)s: %(levelname)s/%(processName)s] %(message)s` + +See the Python :mod:`logging` module for more information about log +formats. + +.. setting:: CELERYD_TASK_LOG_FORMAT + +CELERYD_TASK_LOG_FORMAT +~~~~~~~~~~~~~~~~~~~~~~~ + +The format to use for log messages logged in tasks. Can be overridden using +the :option:`--loglevel` option to :mod:`~celery.bin.worker`. + +Default is:: + + [%(asctime)s: %(levelname)s/%(processName)s] + [%(task_name)s(%(task_id)s)] %(message)s + +See the Python :mod:`logging` module for more information about log +formats. + +.. setting:: CELERY_REDIRECT_STDOUTS + +CELERY_REDIRECT_STDOUTS +~~~~~~~~~~~~~~~~~~~~~~~ + +If enabled `stdout` and `stderr` will be redirected +to the current logger. + +Enabled by default. +Used by :program:`celery worker` and :program:`celery beat`. + +.. setting:: CELERY_REDIRECT_STDOUTS_LEVEL + +CELERY_REDIRECT_STDOUTS_LEVEL +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The log level output to `stdout` and `stderr` is logged as. +Can be one of :const:`DEBUG`, :const:`INFO`, :const:`WARNING`, +:const:`ERROR` or :const:`CRITICAL`. + +Default is :const:`WARNING`. + +.. _conf-security: + +Security +-------- + +.. setting:: CELERY_SECURITY_KEY + +CELERY_SECURITY_KEY +~~~~~~~~~~~~~~~~~~~ + +.. versionadded:: 2.5 + +The relative or absolute path to a file containing the private key +used to sign messages when :ref:`message-signing` is used. + +.. setting:: CELERY_SECURITY_CERTIFICATE + +CELERY_SECURITY_CERTIFICATE +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. versionadded:: 2.5 + +The relative or absolute path to an X.509 certificate file +used to sign messages when :ref:`message-signing` is used. + +.. setting:: CELERY_SECURITY_CERT_STORE + +CELERY_SECURITY_CERT_STORE +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. versionadded:: 2.5 + +The directory containing X.509 certificates used for +:ref:`message-signing`. Can be a glob with wildcards, +(for example :file:`/etc/certs/*.pem`). + +.. _conf-custom-components: + +Custom Component Classes (advanced) +----------------------------------- + +.. setting:: CELERYD_POOL + +CELERYD_POOL +~~~~~~~~~~~~ + +Name of the pool class used by the worker. + +.. admonition:: Eventlet/Gevent + + Never use this option to select the eventlet or gevent pool. + You must use the `-P` option instead, otherwise the monkey patching + will happen too late and things will break in strange and silent ways. + +Default is ``celery.concurrency.prefork:TaskPool``. + +.. setting:: CELERYD_POOL_RESTARTS + +CELERYD_POOL_RESTARTS +~~~~~~~~~~~~~~~~~~~~~ + +If enabled the worker pool can be restarted using the +:control:`pool_restart` remote control command. + +Disabled by default. + +.. setting:: CELERYD_AUTOSCALER + +CELERYD_AUTOSCALER +~~~~~~~~~~~~~~~~~~ + +.. versionadded:: 2.2 + +Name of the autoscaler class to use. + +Default is ``celery.worker.autoscale:Autoscaler``. + +.. setting:: CELERYD_AUTORELOADER + +CELERYD_AUTORELOADER +~~~~~~~~~~~~~~~~~~~~ + +Name of the autoreloader class used by the worker to reload +Python modules and files that have changed. + +Default is: ``celery.worker.autoreload:Autoreloader``. + +.. setting:: CELERYD_CONSUMER + +CELERYD_CONSUMER +~~~~~~~~~~~~~~~~ + +Name of the consumer class used by the worker. +Default is :class:`celery.worker.consumer.Consumer` + +.. setting:: CELERYD_TIMER + +CELERYD_TIMER +~~~~~~~~~~~~~~~~~~~~~ + +Name of the ETA scheduler class used by the worker. +Default is :class:`celery.utils.timer2.Timer`, or one overrided +by the pool implementation. + +.. _conf-celerybeat: + +Periodic Task Server: celery beat +--------------------------------- + +.. setting:: CELERYBEAT_SCHEDULE + +CELERYBEAT_SCHEDULE +~~~~~~~~~~~~~~~~~~~ + +The periodic task schedule used by :mod:`~celery.bin.beat`. +See :ref:`beat-entries`. + +.. setting:: CELERYBEAT_SCHEDULER + +CELERYBEAT_SCHEDULER +~~~~~~~~~~~~~~~~~~~~ + +The default scheduler class. Default is ``celery.beat:PersistentScheduler``. + +Can also be set via the :option:`-S` argument to +:mod:`~celery.bin.beat`. + +.. setting:: CELERYBEAT_SCHEDULE_FILENAME + +CELERYBEAT_SCHEDULE_FILENAME +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Name of the file used by `PersistentScheduler` to store the last run times +of periodic tasks. Can be a relative or absolute path, but be aware that the +suffix `.db` may be appended to the file name (depending on Python version). + +Can also be set via the :option:`--schedule` argument to +:mod:`~celery.bin.beat`. + +.. setting:: CELERYBEAT_SYNC_EVERY + +CELERYBEAT_SYNC_EVERY +~~~~~~~~~~~~~~~~~~~~~ + +The number of periodic tasks that can be called before another database sync +is issued. +Defaults to 0 (sync based on timing - default of 3 minutes as determined by +scheduler.sync_every). If set to 1, beat will call sync after every task +message sent. + +.. setting:: CELERYBEAT_MAX_LOOP_INTERVAL + +CELERYBEAT_MAX_LOOP_INTERVAL +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The maximum number of seconds :mod:`~celery.bin.beat` can sleep +between checking the schedule. + +The default for this value is scheduler specific. +For the default celery beat scheduler the value is 300 (5 minutes), +but for e.g. the django-celery database scheduler it is 5 seconds +because the schedule may be changed externally, and so it must take +changes to the schedule into account. + +Also when running celery beat embedded (:option:`-B`) on Jython as a thread +the max interval is overridden and set to 1 so that it's possible +to shut down in a timely manner. + + +.. _conf-celerymon: + +Monitor Server: celerymon +------------------------- + + +.. setting:: CELERYMON_LOG_FORMAT + +CELERYMON_LOG_FORMAT +~~~~~~~~~~~~~~~~~~~~ + +The format to use for log messages. + +Default is `[%(asctime)s: %(levelname)s/%(processName)s] %(message)s` + +See the Python :mod:`logging` module for more information about log +formats. diff --git a/docs/contributing.rst b/docs/contributing.rst new file mode 100644 index 0000000..0bb7693 --- /dev/null +++ b/docs/contributing.rst @@ -0,0 +1,1086 @@ +.. _contributing: + +============== + Contributing +============== + +Welcome! + +This document is fairly extensive and you are not really expected +to study this in detail for small contributions; + + The most important rule is that contributing must be easy + and that the community is friendly and not nitpicking on details + such as coding style. + +If you're reporting a bug you should read the Reporting bugs section +below to ensure that your bug report contains enough information +to successfully diagnose the issue, and if you're contributing code +you should try to mimic the conventions you see surrounding the code +you are working on, but in the end all patches will be cleaned up by +the person merging the changes so don't worry too much. + +.. contents:: + :local: + +.. _community-code-of-conduct: + +Community Code of Conduct +========================= + +The goal is to maintain a diverse community that is pleasant for everyone. +That is why we would greatly appreciate it if everyone contributing to and +interacting with the community also followed this Code of Conduct. + +The Code of Conduct covers our behavior as members of the community, +in any forum, mailing list, wiki, website, Internet relay chat (IRC), public +meeting or private correspondence. + +The Code of Conduct is heavily based on the `Ubuntu Code of Conduct`_, and +the `Pylons Code of Conduct`_. + +.. _`Ubuntu Code of Conduct`: http://www.ubuntu.com/community/conduct +.. _`Pylons Code of Conduct`: http://docs.pylonshq.com/community/conduct.html + +Be considerate. +--------------- + +Your work will be used by other people, and you in turn will depend on the +work of others. Any decision you take will affect users and colleagues, and +we expect you to take those consequences into account when making decisions. +Even if it's not obvious at the time, our contributions to Celery will impact +the work of others. For example, changes to code, infrastructure, policy, +documentation and translations during a release may negatively impact +others work. + +Be respectful. +-------------- + +The Celery community and its members treat one another with respect. Everyone +can make a valuable contribution to Celery. We may not always agree, but +disagreement is no excuse for poor behavior and poor manners. We might all +experience some frustration now and then, but we cannot allow that frustration +to turn into a personal attack. It's important to remember that a community +where people feel uncomfortable or threatened is not a productive one. We +expect members of the Celery community to be respectful when dealing with +other contributors as well as with people outside the Celery project and with +users of Celery. + +Be collaborative. +----------------- + +Collaboration is central to Celery and to the larger free software community. +We should always be open to collaboration. Your work should be done +transparently and patches from Celery should be given back to the community +when they are made, not just when the distribution releases. If you wish +to work on new code for existing upstream projects, at least keep those +projects informed of your ideas and progress. It many not be possible to +get consensus from upstream, or even from your colleagues about the correct +implementation for an idea, so don't feel obliged to have that agreement +before you begin, but at least keep the outside world informed of your work, +and publish your work in a way that allows outsiders to test, discuss and +contribute to your efforts. + +When you disagree, consult others. +---------------------------------- + +Disagreements, both political and technical, happen all the time and +the Celery community is no exception. It is important that we resolve +disagreements and differing views constructively and with the help of the +community and community process. If you really want to go a different +way, then we encourage you to make a derivative distribution or alternate +set of packages that still build on the work we've done to utilize as common +of a core as possible. + +When you are unsure, ask for help. +---------------------------------- + +Nobody knows everything, and nobody is expected to be perfect. Asking +questions avoids many problems down the road, and so questions are +encouraged. Those who are asked questions should be responsive and helpful. +However, when asking a question, care must be taken to do so in an appropriate +forum. + +Step down considerately. +------------------------ + +Developers on every project come and go and Celery is no different. When you +leave or disengage from the project, in whole or in part, we ask that you do +so in a way that minimizes disruption to the project. This means you should +tell people you are leaving and take the proper steps to ensure that others +can pick up where you leave off. + +.. _reporting-bugs: + + +Reporting Bugs +============== + +.. _vulnsec: + +Security +-------- + +You must never report security related issues, vulnerabilities or bugs +including sensitive information to the bug tracker, or elsewhere in public. +Instead sensitive bugs must be sent by email to ``security@celeryproject.org``. + +If you'd like to submit the information encrypted our PGP key is:: + + -----BEGIN PGP PUBLIC KEY BLOCK----- + Version: GnuPG v1.4.15 (Darwin) + + mQENBFJpWDkBCADFIc9/Fpgse4owLNvsTC7GYfnJL19XO0hnL99sPx+DPbfr+cSE + 9wiU+Wp2TfUX7pCLEGrODiEP6ZCZbgtiPgId+JYvMxpP6GXbjiIlHRw1EQNH8RlX + cVxy3rQfVv8PGGiJuyBBjxzvETHW25htVAZ5TI1+CkxmuyyEYqgZN2fNd0wEU19D + +c10G1gSECbCQTCbacLSzdpngAt1Gkrc96r7wGHBBSvDaGDD2pFSkVuTLMbIRrVp + lnKOPMsUijiip2EMr2DvfuXiUIUvaqInTPNWkDynLoh69ib5xC19CSVLONjkKBsr + Pe+qAY29liBatatpXsydY7GIUzyBT3MzgMJlABEBAAG0MUNlbGVyeSBTZWN1cml0 + eSBUZWFtIDxzZWN1cml0eUBjZWxlcnlwcm9qZWN0Lm9yZz6JATgEEwECACIFAlJp + WDkCGwMGCwkIBwMCBhUIAgkKCwQWAgMBAh4BAheAAAoJEOArFOUDCicIw1IH/26f + CViDC7/P13jr+srRdjAsWvQztia9HmTlY8cUnbmkR9w6b6j3F2ayw8VhkyFWgYEJ + wtPBv8mHKADiVSFARS+0yGsfCkia5wDSQuIv6XqRlIrXUyqJbmF4NUFTyCZYoh+C + ZiQpN9xGhFPr5QDlMx2izWg1rvWlG1jY2Es1v/xED3AeCOB1eUGvRe/uJHKjGv7J + rj0pFcptZX+WDF22AN235WYwgJM6TrNfSu8sv8vNAQOVnsKcgsqhuwomSGsOfMQj + LFzIn95MKBBU1G5wOs7JtwiV9jefGqJGBO2FAvOVbvPdK/saSnB+7K36dQcIHqms + 5hU4Xj0RIJiod5idlRC5AQ0EUmlYOQEIAJs8OwHMkrdcvy9kk2HBVbdqhgAREMKy + gmphDp7prRL9FqSY/dKpCbG0u82zyJypdb7QiaQ5pfPzPpQcd2dIcohkkh7G3E+e + hS2L9AXHpwR26/PzMBXyr2iNnNc4vTksHvGVDxzFnRpka6vbI/hrrZmYNYh9EAiv + uhE54b3/XhXwFgHjZXb9i8hgJ3nsO0pRwvUAM1bRGMbvf8e9F+kqgV0yWYNnh6QL + 4Vpl1+epqp2RKPHyNQftbQyrAHXT9kQF9pPlx013MKYaFTADscuAp4T3dy7xmiwS + crqMbZLzfrxfFOsNxTUGE5vmJCcm+mybAtRo4aV6ACohAO9NevMx8pUAEQEAAYkB + HwQYAQIACQUCUmlYOQIbDAAKCRDgKxTlAwonCNFbB/9esir/f7TufE+isNqErzR/ + aZKZo2WzZR9c75kbqo6J6DYuUHe6xI0OZ2qZ60iABDEZAiNXGulysFLCiPdatQ8x + 8zt3DF9BMkEck54ZvAjpNSern6zfZb1jPYWZq3TKxlTs/GuCgBAuV4i5vDTZ7xK/ + aF+OFY5zN7ciZHkqLgMiTZ+RhqRcK6FhVBP/Y7d9NlBOcDBTxxE1ZO1ute6n7guJ + ciw4hfoRk8qNN19szZuq3UU64zpkM2sBsIFM9tGF2FADRxiOaOWZHmIyVZriPFqW + RUwjSjs7jBVNq0Vy4fCu/5+e+XLOUBOoqtM5W7ELt0t1w9tXebtPEetV86in8fU2 + =0chn + -----END PGP PUBLIC KEY BLOCK----- + +Other bugs +---------- + +Bugs can always be described to the :ref:`mailing-list`, but the best +way to report an issue and to ensure a timely response is to use the +issue tracker. + +1) **Create a GitHub account.** + +You need to `create a GitHub account`_ to be able to create new issues +and participate in the discussion. + +.. _`create a GitHub account`: https://github.com/signup/free + +2) **Determine if your bug is really a bug.** + +You should not file a bug if you are requesting support. For that you can use +the :ref:`mailing-list`, or :ref:`irc-channel`. + +3) **Make sure your bug hasn't already been reported.** + +Search through the appropriate Issue tracker. If a bug like yours was found, +check if you have new information that could be reported to help +the developers fix the bug. + +4) **Check if you're using the latest version.** + +A bug could be fixed by some other improvements and fixes - it might not have an +existing report in the bug tracker. Make sure you're using the latest releases of +celery, billiard and kombu. + +5) **Collect information about the bug.** + +To have the best chance of having a bug fixed, we need to be able to easily +reproduce the conditions that caused it. Most of the time this information +will be from a Python traceback message, though some bugs might be in design, +spelling or other errors on the website/docs/code. + + A) If the error is from a Python traceback, include it in the bug report. + + B) We also need to know what platform you're running (Windows, OS X, Linux, + etc.), the version of your Python interpreter, and the version of Celery, + and related packages that you were running when the bug occurred. + + C) If you are reporting a race condition or a deadlock, tracebacks can be + hard to get or might not be that useful. Try to inspect the process to + get more diagnostic data. Some ideas: + + * Enable celery's :ref:`breakpoint signal ` and use it + to inspect the process's state. This will allow you to open a + :mod:`pdb` session. + * Collect tracing data using strace_(Linux), dtruss (OSX) and ktrace(BSD), + ltrace_ and lsof_. + + D) Include the output from the `celery report` command: + + .. code-block:: bash + + $ celery -A proj report + + This will also include your configuration settings and it try to + remove values for keys known to be sensitive, but make sure you also + verify the information before submitting so that it doesn't contain + confidential information like API tokens and authentication + credentials. + +6) **Submit the bug.** + +By default `GitHub`_ will email you to let you know when new comments have +been made on your bug. In the event you've turned this feature off, you +should check back on occasion to ensure you don't miss any questions a +developer trying to fix the bug might ask. + +.. _`GitHub`: http://github.com +.. _`strace`: http://en.wikipedia.org/wiki/Strace +.. _`ltrace`: http://en.wikipedia.org/wiki/Ltrace +.. _`lsof`: http://en.wikipedia.org/wiki/Lsof + +.. _issue-trackers: + +Issue Trackers +-------------- + +Bugs for a package in the Celery ecosystem should be reported to the relevant +issue tracker. + +* Celery: http://github.com/celery/celery/issues/ +* Kombu: http://github.com/celery/kombu/issues +* pyamqp: http://github.com/celery/pyamqp/issues +* librabbitmq: http://github.com/celery/librabbitmq/issues +* Django-Celery: http://github.com/celery/django-celery/issues + +If you are unsure of the origin of the bug you can ask the +:ref:`mailing-list`, or just use the Celery issue tracker. + +Contributors guide to the codebase +================================== + +There's a separate section for internal details, +including details about the codebase and a style guide. + +Read :ref:`internals-guide` for more! + +.. _versions: + +Versions +======== + +Version numbers consists of a major version, minor version and a release number. +Since version 2.1.0 we use the versioning semantics described by +semver: http://semver.org. + +Stable releases are published at PyPI +while development releases are only available in the GitHub git repository as tags. +All version tags starts with “v”, so version 0.8.0 is the tag v0.8.0. + +.. _git-branches: + +Branches +======== + +Current active version branches: + +* master (http://github.com/celery/celery/tree/master) +* 3.1 (http://github.com/celery/celery/tree/3.1) +* 3.0 (http://github.com/celery/celery/tree/3.0) + +You can see the state of any branch by looking at the Changelog: + + https://github.com/celery/celery/blob/master/Changelog + +If the branch is in active development the topmost version info should +contain metadata like:: + + 2.4.0 + ====== + :release-date: TBA + :status: DEVELOPMENT + :branch: master + +The ``status`` field can be one of: + +* ``PLANNING`` + + The branch is currently experimental and in the planning stage. + +* ``DEVELOPMENT`` + + The branch is in active development, but the test suite should + be passing and the product should be working and possible for users to test. + +* ``FROZEN`` + + The branch is frozen, and no more features will be accepted. + When a branch is frozen the focus is on testing the version as much + as possible before it is released. + +``master`` branch +----------------- + +The master branch is where development of the next version happens. + +Maintenance branches +-------------------- + +Maintenance branches are named after the version, e.g. the maintenance branch +for the 2.2.x series is named ``2.2``. Previously these were named +``releaseXX-maint``. + +The versions we currently maintain is: + +* 3.1 + + This is the current series. + +* 3.0 + + This is the previous series, and the last version to support Python 2.5. + +Archived branches +----------------- + +Archived branches are kept for preserving history only, +and theoretically someone could provide patches for these if they depend +on a series that is no longer officially supported. + +An archived version is named ``X.Y-archived``. + +Our currently archived branches are: + +* 2.5-archived + +* 2.4-archived + +* 2.3-archived + +* 2.1-archived + +* 2.0-archived + +* 1.0-archived + +Feature branches +---------------- + +Major new features are worked on in dedicated branches. +There is no strict naming requirement for these branches. + +Feature branches are removed once they have been merged into a release branch. + +Tags +==== + +Tags are used exclusively for tagging releases. A release tag is +named with the format ``vX.Y.Z``, e.g. ``v2.3.1``. +Experimental releases contain an additional identifier ``vX.Y.Z-id``, e.g. +``v3.0.0-rc1``. Experimental tags may be removed after the official release. + +.. _contributing-changes: + +Working on Features & Patches +============================= + +.. note:: + + Contributing to Celery should be as simple as possible, + so none of these steps should be considered mandatory. + + You can even send in patches by email if that is your preferred + work method. We won't like you any less, any contribution you make + is always appreciated! + + However following these steps may make maintainers life easier, + and may mean that your changes will be accepted sooner. + +Forking and setting up the repository +------------------------------------- + +First you need to fork the Celery repository, a good introduction to this +is in the Github Guide: `Fork a Repo`_. + +After you have cloned the repository you should checkout your copy +to a directory on your machine: + +.. code-block:: bash + + $ git clone git@github.com:username/celery.git + +When the repository is cloned enter the directory to set up easy access +to upstream changes: + +.. code-block:: bash + + $ cd celery + $ git remote add upstream git://github.com/celery/celery.git + $ git fetch upstream + +If you need to pull in new changes from upstream you should +always use the :option:`--rebase` option to ``git pull``: + +.. code-block:: bash + + git pull --rebase upstream master + +With this option you don't clutter the history with merging +commit notes. See `Rebasing merge commits in git`_. +If you want to learn more about rebasing see the `Rebase`_ +section in the Github guides. + +If you need to work on a different branch than ``master`` you can +fetch and checkout a remote branch like this:: + + git checkout --track -b 3.0-devel origin/3.0-devel + +.. _`Fork a Repo`: http://help.github.com/fork-a-repo/ +.. _`Rebasing merge commits in git`: + http://notes.envato.com/developers/rebasing-merge-commits-in-git/ +.. _`Rebase`: http://help.github.com/rebase/ + +.. _contributing-testing: + +Running the unit test suite +--------------------------- + +To run the Celery test suite you need to install a few dependencies. +A complete list of the dependencies needed are located in +:file:`requirements/test.txt`. + +Installing the test requirements: + +.. code-block:: bash + + $ pip install -U -r requirements/test.txt + +When installation of dependencies is complete you can execute +the test suite by calling ``nosetests``: + +.. code-block:: bash + + $ nosetests + +Some useful options to :program:`nosetests` are: + +* :option:`-x` + + Stop running the tests at the first test that fails. + +* :option:`-s` + + Don't capture output + +* :option:`--nologcapture` + + Don't capture log output. + +* :option:`-v` + + Run with verbose output. + +If you want to run the tests for a single test file only +you can do so like this: + +.. code-block:: bash + + $ nosetests celery.tests.test_worker.test_worker_job + +.. _contributing-pull-requests: + +Creating pull requests +---------------------- + +When your feature/bugfix is complete you may want to submit +a pull requests so that it can be reviewed by the maintainers. + +Creating pull requests is easy, and also let you track the progress +of your contribution. Read the `Pull Requests`_ section in the Github +Guide to learn how this is done. + +You can also attach pull requests to existing issues by following +the steps outlined here: http://bit.ly/koJoso + +.. _`Pull Requests`: http://help.github.com/send-pull-requests/ + +.. _contributing-coverage: + +Calculating test coverage +~~~~~~~~~~~~~~~~~~~~~~~~~ + +To calculate test coverage you must first install the :mod:`coverage` module. + +Installing the :mod:`coverage` module: + +.. code-block:: bash + + $ pip install -U coverage + +Code coverage in HTML: + +.. code-block:: bash + + $ nosetests --with-coverage --cover-html + +The coverage output will then be located at +:file:`celery/tests/cover/index.html`. + +Code coverage in XML (Cobertura-style): + +.. code-block:: bash + + $ nosetests --with-coverage --cover-xml --cover-xml-file=coverage.xml + +The coverage XML output will then be located at :file:`coverage.xml` + +.. _contributing-tox: + +Running the tests on all supported Python versions +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +There is a ``tox`` configuration file in the top directory of the +distribution. + +To run the tests for all supported Python versions simply execute: + +.. code-block:: bash + + $ tox + +If you only want to test specific Python versions use the :option:`-e` +option: + +.. code-block:: bash + + $ tox -e py26 + +Building the documentation +-------------------------- + +To build the documentation you need to install the dependencies +listed in :file:`requirements/docs.txt`: + +.. code-block:: bash + + $ pip install -U -r requirements/docs.txt + +After these dependencies are installed you should be able to +build the docs by running: + +.. code-block:: bash + + $ cd docs + $ rm -rf .build + $ make html + +Make sure there are no errors or warnings in the build output. +After building succeeds the documentation is available at :file:`.build/html`. + +.. _contributing-verify: + +Verifying your contribution +--------------------------- + +To use these tools you need to install a few dependencies. These dependencies +can be found in :file:`requirements/pkgutils.txt`. + +Installing the dependencies: + +.. code-block:: bash + + $ pip install -U -r requirements/pkgutils.txt + +pyflakes & PEP8 +~~~~~~~~~~~~~~~ + +To ensure that your changes conform to PEP8 and to run pyflakes +execute: + +.. code-block:: bash + + $ paver flake8 + +To not return a negative exit code when this command fails use the +:option:`-E` option, this can be convenient while developing: + +.. code-block:: bash + + $ paver flake8 -E + +API reference +~~~~~~~~~~~~~ + +To make sure that all modules have a corresponding section in the API +reference please execute: + +.. code-block:: bash + + $ paver autodoc + $ paver verifyindex + +If files are missing you can add them by copying an existing reference file. + +If the module is internal it should be part of the internal reference +located in :file:`docs/internals/reference/`. If the module is public +it should be located in :file:`docs/reference/`. + +For example if reference is missing for the module ``celery.worker.awesome`` +and this module is considered part of the public API, use the following steps: + + +Use an existing file as a template: + +.. code-block:: bash + + $ cd docs/reference/ + $ cp celery.schedules.rst celery.worker.awesome.rst + +Edit the file using your favorite editor: + +.. code-block:: bash + + $ vim celery.worker.awesome.rst + + # change every occurance of ``celery.schedules`` to + # ``celery.worker.awesome`` + + +Edit the index using your favorite editor: + +.. code-block:: bash + + $ vim index.rst + + # Add ``celery.worker.awesome`` to the index. + + +Commit your changes: + +.. code-block:: bash + + # Add the file to git + $ git add celery.worker.awesome.rst + $ git add index.rst + $ git commit celery.worker.awesome.rst index.rst \ + -m "Adds reference for celery.worker.awesome" + +.. _coding-style: + +Coding Style +============ + +You should probably be able to pick up the coding style +from surrounding code, but it is a good idea to be aware of the +following conventions. + +* All Python code must follow the `PEP-8`_ guidelines. + +`pep8.py`_ is an utility you can use to verify that your code +is following the conventions. + +.. _`PEP-8`: http://www.python.org/dev/peps/pep-0008/ +.. _`pep8.py`: http://pypi.python.org/pypi/pep8 + +* Docstrings must follow the `PEP-257`_ conventions, and use the following + style. + + Do this: + + .. code-block:: python + + def method(self, arg): + """Short description. + + More details. + + """ + + or: + + .. code-block:: python + + def method(self, arg): + """Short description.""" + + + but not this: + + .. code-block:: python + + def method(self, arg): + """ + Short description. + """ + +.. _`PEP-257`: http://www.python.org/dev/peps/pep-0257/ + +* Lines should not exceed 78 columns. + + You can enforce this in :program:`vim` by setting the ``textwidth`` option: + + .. code-block:: vim + + set textwidth=78 + + If adhering to this limit makes the code less readable, you have one more + character to go on, which means 78 is a soft limit, and 79 is the hard + limit :) + +* Import order + + * Python standard library (`import xxx`) + * Python standard library ('from xxx import`) + * Third party packages. + * Other modules from the current package. + + or in case of code using Django: + + * Python standard library (`import xxx`) + * Python standard library ('from xxx import`) + * Third party packages. + * Django packages. + * Other modules from the current package. + + Within these sections the imports should be sorted by module name. + + Example: + + .. code-block:: python + + import threading + import time + + from collections import deque + from Queue import Queue, Empty + + from .datastructures import TokenBucket + from .five import zip_longest, items, range + from .utils import timeutils + +* Wildcard imports must not be used (`from xxx import *`). + +* For distributions where Python 2.5 is the oldest support version + additional rules apply: + + * Absolute imports must be enabled at the top of every module:: + + from __future__ import absolute_import + + * If the module uses the with statement and must be compatible + with Python 2.5 (celery is not) then it must also enable that:: + + from __future__ import with_statement + + * Every future import must be on its own line, as older Python 2.5 + releases did not support importing multiple features on the + same future import line:: + + # Good + from __future__ import absolute_import + from __future__ import with_statement + + # Bad + from __future__ import absolute_import, with_statement + + (Note that this rule does not apply if the package does not include + support for Python 2.5) + + +* Note that we use "new-style` relative imports when the distribution + does not support Python versions below 2.5 + + This requires Python 2.5 or later: + + .. code-block:: python + + from . import submodule + + +.. _feature-with-extras: + +Contributing features requiring additional libraries +==================================================== + +Some features like a new result backend may require additional libraries +that the user must install. + +We use setuptools `extra_requires` for this, and all new optional features +that require 3rd party libraries must be added. + +1) Add a new requirements file in `requirements/extras` + + E.g. for the Cassandra backend this is + :file:`requirements/extras/cassandra.txt`, and the file looks like this:: + + pycassa + + These are pip requirement files so you can have version specifiers and + multiple packages are separated by newline. A more complex example could + be: + + # pycassa 2.0 breaks Foo + pycassa>=1.0,<2.0 + thrift + +2) Modify ``setup.py`` + + After the requirements file is added you need to add it as an option + to ``setup.py`` in the ``extras_require`` section:: + + extra['extras_require'] = { + # ... + 'cassandra': extras('cassandra.txt'), + } + +3) Document the new feature in ``docs/includes/installation.txt`` + + You must add your feature to the list in the :ref:`bundles` section + of :file:`docs/includes/installation.txt`. + + After you've made changes to this file you need to render + the distro :file:`README` file: + + .. code-block:: bash + + $ pip install -U requirements/pkgutils.txt + $ paver readme + + +That's all that needs to be done, but remember that if your feature +adds additional configuration options then these needs to be documented +in ``docs/configuration.rst``. Also all settings need to be added to the +``celery/app/defaults.py`` module. + +Result backends require a separate section in the ``docs/configuration.rst`` +file. + +.. _contact_information: + +Contacts +======== + +This is a list of people that can be contacted for questions +regarding the official git repositories, PyPI packages +Read the Docs pages. + +If the issue is not an emergency then it is better +to :ref:`report an issue `. + + +Committers +---------- + +Ask Solem +~~~~~~~~~ + +:github: https://github.com/ask +:twitter: http://twitter.com/#!/asksol + +Mher Movsisyan +~~~~~~~~~~~~~~ + +:github: https://github.com/mher +:twitter: http://twitter.com/#!/movsm + +Steeve Morin +~~~~~~~~~~~~ + +:github: https://github.com/steeve +:twitter: http://twitter.com/#!/steeve + +Website +------- + +The Celery Project website is run and maintained by + +Mauro Rocco +~~~~~~~~~~~ + +:github: https://github.com/fireantology +:twitter: https://twitter.com/#!/fireantology + +with design by: + +Jan Henrik Helmers +~~~~~~~~~~~~~~~~~~ + +:web: http://www.helmersworks.com +:twitter: http://twitter.com/#!/helmers + + +.. _packages: + +Packages +======== + +celery +------ + +:git: https://github.com/celery/celery +:CI: http://travis-ci.org/#!/celery/celery +:PyPI: http://pypi.python.org/pypi/celery +:docs: http://docs.celeryproject.org + +kombu +----- + +Messaging library. + +:git: https://github.com/celery/kombu +:CI: http://travis-ci.org/#!/celery/kombu +:PyPI: http://pypi.python.org/pypi/kombu +:docs: http://kombu.readthedocs.org + +amqp +---- + +Python AMQP 0.9.1 client. + +:git: https://github.com/celery/py-amqp +:CI: http://travis-ci.org/#!/celery/py-amqp +:PyPI: http://pypi.python.org/pypi/amqp +:docs: http://amqp.readthedocs.org + +billiard +-------- + +Fork of multiprocessing containing improvements +that will eventually be merged into the Python stdlib. + +:git: https://github.com/celery/billiard +:PyPI: http://pypi.python.org/pypi/billiard + +librabbitmq +----------- + +Very fast Python AMQP client written in C. + +:git: https://github.com/celery/librabbitmq +:PyPI: http://pypi.python.org/pypi/librabbitmq + +celerymon +--------- + +Celery monitor web-service. + +:git: https://github.com/celery/celerymon +:PyPI: http://pypi.python.org/pypi/celerymon + +django-celery +------------- + +Django <-> Celery Integration. + +:git: https://github.com/celery/django-celery +:PyPI: http://pypi.python.org/pypi/django-celery +:docs: http://docs.celeryproject.org/en/latest/django + +cl +-- + +Actor library. + +:git: https://github.com/celery/cl +:PyPI: http://pypi.python.org/pypi/cl + +cyme +---- + +Distributed Celery Instance manager. + +:git: https://github.com/celery/cyme +:PyPI: http://pypi.python.org/pypi/cyme +:docs: http://cyme.readthedocs.org/ + + +Deprecated +---------- + +- Flask-Celery + +:git: https://github.com/ask/Flask-Celery +:PyPI: http://pypi.python.org/pypi/Flask-Celery + +- carrot + +:git: https://github.com/ask/carrot +:PyPI: http://pypi.python.org/pypi/carrot + +- ghettoq + +:git: https://github.com/ask/ghettoq +:PyPI: http://pypi.python.org/pypi/ghettoq + +- kombu-sqlalchemy + +:git: https://github.com/ask/kombu-sqlalchemy +:PyPI: http://pypi.python.org/pypi/kombu-sqlalchemy + +- django-kombu + +:git: https://github.com/ask/django-kombu +:PyPI: http://pypi.python.org/pypi/django-kombu + +- pylibrabbitmq + +Old name for :mod:`librabbitmq`. + +:git: :const:`None` +:PyPI: http://pypi.python.org/pypi/pylibrabbitmq + +.. _release-procedure: + + +Release Procedure +================= + +Updating the version number +--------------------------- + +The version number must be updated two places: + + * :file:`celery/__init__.py` + * :file:`docs/include/introduction.txt` + +After you have changed these files you must render +the :file:`README` files. There is a script to convert sphinx syntax +to generic reStructured Text syntax, and the paver task `readme` +does this for you: + +.. code-block:: bash + + $ paver readme + +Now commit the changes: + +.. code-block:: bash + + $ git commit -a -m "Bumps version to X.Y.Z" + +and make a new version tag: + +.. code-block:: bash + + $ git tag vX.Y.Z + $ git push --tags + +Releasing +--------- + +Commands to make a new public stable release:: + + $ paver releaseok # checks pep8, autodoc index, runs tests and more + $ paver removepyc # Remove .pyc files + $ git clean -xdn # Check that there's no left-over files in the repo + $ python setup.py sdist upload # Upload package to PyPI + +If this is a new release series then you also need to do the +following: + +* Go to the Read The Docs management interface at: + http://readthedocs.org/projects/celery/?fromdocs=celery + +* Enter "Edit project" + + Change default branch to the branch of this series, e.g. ``2.4`` + for series 2.4. + +* Also add the previous version under the "versions" tab. diff --git a/docs/copyright.rst b/docs/copyright.rst new file mode 100644 index 0000000..bfffb30 --- /dev/null +++ b/docs/copyright.rst @@ -0,0 +1,27 @@ +Copyright +========= + +*Celery User Manual* + +by Ask Solem + +.. |copy| unicode:: U+000A9 .. COPYRIGHT SIGN + +Copyright |copy| 2009-2014, Ask Solem. + +All rights reserved. This material may be copied or distributed only +subject to the terms and conditions set forth in the `Creative Commons +Attribution-Noncommercial-Share Alike 3.0 United States License +`_. You must +give the original author credit. You may not use this work for +commercial purposes. If you alter, transform, or build upon this +work, you may distribute the resulting work only under the same or +similar license to this one. + +.. note:: + + While the *Celery* documentation is offered under the + Creative Commons *attribution-nonconmmercial-share alike 3.0 united + states* license, the Celery *software* is offered under the + less restrictive + `BSD License (3 Clause) `_ diff --git a/docs/django/first-steps-with-django.rst b/docs/django/first-steps-with-django.rst new file mode 100644 index 0000000..e25022e --- /dev/null +++ b/docs/django/first-steps-with-django.rst @@ -0,0 +1,231 @@ +.. _django-first-steps: + +========================= + First steps with Django +========================= + +Using Celery with Django +======================== + +.. note:: + + Previous versions of Celery required a separate library to work with Django, + but since 3.1 this is no longer the case. Django is supported out of the + box now so this document only contains a basic way to integrate Celery and + Django. You will use the same API as non-Django users so it's recommended that + you read the :ref:`first-steps` tutorial + first and come back to this tutorial. When you have a working example you can + continue to the :ref:`next-steps` guide. + +To use Celery with your Django project you must first define +an instance of the Celery library (called an "app") + +If you have a modern Django project layout like:: + + - proj/ + - proj/__init__.py + - proj/settings.py + - proj/urls.py + - manage.py + +then the recommended way is to create a new `proj/proj/celery.py` module +that defines the Celery instance: + +:file: `proj/proj/celery.py` + +.. literalinclude:: ../../examples/django/proj/celery.py + +Then you need to import this app in your :file:`proj/proj/__init__.py` +module. This ensures that the app is loaded when Django starts +so that the ``@shared_task`` decorator (mentioned later) will use it: + +:file:`proj/proj/__init__.py`: + +.. literalinclude:: ../../examples/django/proj/__init__.py + +Note that this example project layout is suitable for larger projects, +for simple projects you may use a single contained module that defines +both the app and tasks, like in the :ref:`tut-celery` tutorial. + +Let's break down what happens in the first module, +first we import absolute imports from the future, so that our +``celery.py`` module will not clash with the library: + +.. code-block:: python + + from __future__ import absolute_import + +Then we set the default :envvar:`DJANGO_SETTINGS_MODULE` +for the :program:`celery` command-line program: + +.. code-block:: python + + os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'proj.settings') + +You don't need this line, but it saves you from always passing in the +settings module to the celery program. It must always come before +creating the app instances, which is what we do next: + +.. code-block:: python + + app = Celery('proj') + +This is our instance of the library, you can have many instances +but there's probably no reason for that when using Django. + +We also add the Django settings module as a configuration source +for Celery. This means that you don't have to use multiple +configuration files, and instead configure Celery directly +from the Django settings. + +You can pass the object directly here, but using a string is better since +then the worker doesn't have to serialize the object when using Windows +or execv: + +.. code-block:: python + + app.config_from_object('django.conf:settings') + +Next, a common practice for reusable apps is to define all tasks +in a separate ``tasks.py`` module, and Celery does have a way to +autodiscover these modules: + +.. code-block:: python + + app.autodiscover_tasks(lambda: settings.INSTALLED_APPS) + +With the line above Celery will automatically discover tasks in reusable +apps if you follow the ``tasks.py`` convention:: + + - app1/ + - app1/tasks.py + - app1/models.py + - app2/ + - app2/tasks.py + - app2/models.py + +This way you do not have to manually add the individual modules +to the :setting:`CELERY_IMPORTS` setting. The ``lambda`` so that the +autodiscovery can happen only when needed, and so that importing your +module will not evaluate the Django settings object. + +Finally, the ``debug_task`` example is a task that dumps +its own request information. This is using the new ``bind=True`` task option +introduced in Celery 3.1 to easily refer to the current task instance. + +Using the ``@shared_task`` decorator +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The tasks you write will probably live in reusable apps, and reusable +apps cannot depend on the project itself, so you also cannot import your app +instance directly. + +The ``@shared_task`` decorator lets you create tasks without having any +concrete app instance: + +:file:`demoapp/tasks.py`: + +.. literalinclude:: ../../examples/django/demoapp/tasks.py + + +.. seealso:: + + You can find the full source code for the Django example project at: + https://github.com/celery/celery/tree/3.1/examples/django/ + +Using the Django ORM/Cache as a result backend. +----------------------------------------------- + +The ``django-celery`` library defines result backends that +uses the Django ORM and Django Cache frameworks. + +To use this with your project you need to follow these four steps: + +1. Install the ``django-celery`` library: + + .. code-block:: bash + + $ pip install django-celery + +2. Add ``djcelery`` to ``INSTALLED_APPS``. + +3. Create the celery database tables. + + This step will create the tables used to store results + when using the database result backend and the tables used + by the database periodic task scheduler. You can skip + this step if you don't use these. + + If you are using south_ for schema migrations, you'll want to: + + .. code-block:: bash + + $ python manage.py migrate djcelery + + For those who are not using south, a normal ``syncdb`` will work: + + .. code-block:: bash + + $ python manage.py syncdb + +4. Configure celery to use the django-celery backend. + + For the database backend you must use: + + .. code-block:: python + + app.conf.update( + CELERY_RESULT_BACKEND='djcelery.backends.database:DatabaseBackend', + ) + + For the cache backend you can use: + + .. code-block:: python + + app.conf.update( + CELERY_RESULT_BACKEND='djcelery.backends.cache:CacheBackend', + ) + + If you have connected Celery to your Django settings then you can + add this directly into your settings module (without the + ``app.conf.update`` part) + + + +.. _south: http://pypi.python.org/pypi/South/ + +.. admonition:: Relative Imports + + You have to be consistent in how you import the task module, e.g. if + you have ``project.app`` in ``INSTALLED_APPS`` then you also + need to import the tasks ``from project.app`` or else the names + of the tasks will be different. + + See :ref:`task-naming-relative-imports` + +Starting the worker process +=========================== + +In a production environment you will want to run the worker in the background +as a daemon - see :ref:`daemonizing` - but for testing and +development it is useful to be able to start a worker instance by using the +``celery worker`` manage command, much as you would use Django's runserver: + +.. code-block:: bash + + $ celery -A proj worker -l info + + +For a complete listing of the command-line options available, +use the help command: + +.. code-block:: bash + + $ celery help + +Where to go from here +===================== + +If you want to learn more you should continue to the +:ref:`Next Steps ` tutorial, and after that you +can study the :ref:`User Guide `. diff --git a/docs/django/index.rst b/docs/django/index.rst new file mode 100644 index 0000000..5c74639 --- /dev/null +++ b/docs/django/index.rst @@ -0,0 +1,13 @@ +.. _django: + +========= + Django +========= + +:Release: |version| +:Date: |today| + +.. toctree:: + :maxdepth: 2 + + first-steps-with-django diff --git a/docs/faq.rst b/docs/faq.rst new file mode 100644 index 0000000..f4c785e --- /dev/null +++ b/docs/faq.rst @@ -0,0 +1,894 @@ +.. _faq: + +============================ + Frequently Asked Questions +============================ + +.. contents:: + :local: + +.. _faq-general: + +General +======= + +.. _faq-when-to-use: + +What kinds of things should I use Celery for? +--------------------------------------------- + +**Answer:** `Queue everything and delight everyone`_ is a good article +describing why you would use a queue in a web context. + +.. _`Queue everything and delight everyone`: + http://decafbad.com/blog/2008/07/04/queue-everything-and-delight-everyone + +These are some common use cases: + +* Running something in the background. For example, to finish the web request + as soon as possible, then update the users page incrementally. + This gives the user the impression of good performance and "snappiness", even + though the real work might actually take some time. + +* Running something after the web request has finished. + +* Making sure something is done, by executing it asynchronously and using + retries. + +* Scheduling periodic work. + +And to some degree: + +* Distributed computing. + +* Parallel execution. + +.. _faq-misconceptions: + +Misconceptions +============== + +.. _faq-loc: + +Does Celery really consist of 50.000 lines of code? +--------------------------------------------------- + +**Answer:** No, this and similarly large numbers have +been reported at various locations. + +The numbers as of this writing are: + + - core: 7,141 lines of code. + - tests: 14,209 lines. + - backends, contrib, compat utilities: 9,032 lines. + +Lines of code is not a useful metric, so +even if Celery did consist of 50k lines of code you would not +be able to draw any conclusions from such a number. + +Does Celery have many dependencies? +----------------------------------- + +A common criticism is that Celery uses too many dependencies. +The rationale behind such a fear is hard to imagine, especially considering +code reuse as the established way to combat complexity in modern software +development, and that the cost of adding dependencies is very low now +that package managers like pip and PyPI makes the hassle of installing +and maintaining dependencies a thing of the past. + +Celery has replaced several dependencies along the way, and +the current list of dependencies are: + +celery +~~~~~~ + +- `kombu`_ + +Kombu is part of the Celery ecosystem and is the library used +to send and receive messages. It is also the library that enables +us to support many different message brokers. It is also used by the +OpenStack project, and many others, validating the choice to separate +it from the Celery codebase. + +.. _`kombu`: http://pypi.python.org/pypi/kombu + +- `billiard`_ + +Billiard is a fork of the Python multiprocessing module containing +many performance and stability improvements. It is an eventual goal +that these improvements will be merged back into Python one day. + +It is also used for compatibility with older Python versions +that doesn't come with the multiprocessing module. + +.. _`billiard`: http://pypi.python.org/pypi/billiard + +- `pytz` + +The pytz module provides timezone definitions and related tools. + +.. _`pytz`: http://pypi.python.org/pypi/pytz + +django-celery +~~~~~~~~~~~~~ + +If you use django-celery then you don't have to install celery separately, +as it will make sure that the required version is installed. + +django-celery does not have any other dependencies. + +kombu +~~~~~ + +Kombu depends on the following packages: + +- `amqp`_ + +The underlying pure-Python amqp client implementation. AMQP being the default +broker this is a natural dependency. + +.. _`amqp`: http://pypi.python.org/pypi/amqp + +- `anyjson`_ + +anyjson is an utility library to select the best possible +JSON implementation. + +.. _`anyjson`: http://pypi.python.org/pypi/anyjson + + +.. note:: + + For compatibility reasons additional packages may be installed + if you are running on older Python versions, + for example Python 2.6 depends on the ``importlib``, + and ``ordereddict`` libraries. + + Also, to handle the dependencies for popular configuration + choices Celery defines a number of "bundle" packages, + see :ref:`bundles`. + + +.. _faq-heavyweight: + +Is Celery heavy-weight? +----------------------- + +Celery poses very little overhead both in memory footprint and +performance. + +But please note that the default configuration is not optimized for time nor +space, see the :ref:`guide-optimizing` guide for more information. + +.. _faq-serializion-is-a-choice: + +Is Celery dependent on pickle? +------------------------------ + +**Answer:** No. + +Celery can support any serialization scheme and has built-in support for +JSON, YAML, Pickle and msgpack. Also, as every task is associated with a +content type, you can even send one task using pickle, and another using JSON. + +The default serialization format is pickle simply because it is +convenient (it supports sending complex Python objects as task arguments). + +If you need to communicate with other languages you should change +to a serialization format that is suitable for that. + +You can set a global default serializer, the default serializer for a +particular Task, or even what serializer to use when sending a single task +instance. + +.. _faq-is-celery-for-django-only: + +Is Celery for Django only? +-------------------------- + +**Answer:** No. + +You can use Celery with any framework, web or otherwise. + +.. _faq-is-celery-for-rabbitmq-only: + +Do I have to use AMQP/RabbitMQ? +------------------------------- + +**Answer**: No. + +Although using RabbitMQ is recommended you can also use Redis. There are also +experimental transports available such as MongoDB, Beanstalk, CouchDB, or using +SQL databases. See :ref:`brokers` for more information. + +The experimental transports may have reliability problems and +limited broadcast and event functionality. +For example remote control commands only works with AMQP and Redis. + +Redis or a database won't perform as well as +an AMQP broker. If you have strict reliability requirements you are +encouraged to use RabbitMQ or another AMQP broker. Some transports also uses +polling, so they are likely to consume more resources. However, if you for +some reason are not able to use AMQP, feel free to use these alternatives. +They will probably work fine for most use cases, and note that the above +points are not specific to Celery; If using Redis/database as a queue worked +fine for you before, it probably will now. You can always upgrade later +if you need to. + +.. _faq-is-celery-multilingual: + +Is Celery multilingual? +------------------------ + +**Answer:** Yes. + +:mod:`~celery.bin.worker` is an implementation of Celery in Python. If the +language has an AMQP client, there shouldn't be much work to create a worker +in your language. A Celery worker is just a program connecting to the broker +to process messages. + +Also, there's another way to be language independent, and that is to use REST +tasks, instead of your tasks being functions, they're URLs. With this +information you can even create simple web servers that enable preloading of +code. See: :ref:`User Guide: Remote Tasks `. + +.. _faq-troubleshooting: + +Troubleshooting +=============== + +.. _faq-mysql-deadlocks: + +MySQL is throwing deadlock errors, what can I do? +------------------------------------------------- + +**Answer:** MySQL has default isolation level set to `REPEATABLE-READ`, +if you don't really need that, set it to `READ-COMMITTED`. +You can do that by adding the following to your :file:`my.cnf`:: + + [mysqld] + transaction-isolation = READ-COMMITTED + +For more information about InnoDB`s transaction model see `MySQL - The InnoDB +Transaction Model and Locking`_ in the MySQL user manual. + +(Thanks to Honza Kral and Anton Tsigularov for this solution) + +.. _`MySQL - The InnoDB Transaction Model and Locking`: http://dev.mysql.com/doc/refman/5.1/en/innodb-transaction-model.html + +.. _faq-worker-hanging: + +The worker is not doing anything, just hanging +---------------------------------------------- + +**Answer:** See `MySQL is throwing deadlock errors, what can I do?`_. + or `Why is Task.delay/apply\* just hanging?`. + +.. _faq-results-unreliable: + +Task results aren't reliably returning +-------------------------------------- + +**Answer:** If you're using the database backend for results, and in particular +using MySQL, see `MySQL is throwing deadlock errors, what can I do?`_. + +.. _faq-publish-hanging: + +Why is Task.delay/apply\*/the worker just hanging? +-------------------------------------------------- + +**Answer:** There is a bug in some AMQP clients that will make it hang if +it's not able to authenticate the current user, the password doesn't match or +the user does not have access to the virtual host specified. Be sure to check +your broker logs (for RabbitMQ that is :file:`/var/log/rabbitmq/rabbit.log` on +most systems), it usually contains a message describing the reason. + +.. _faq-worker-on-freebsd: + +Does it work on FreeBSD? +------------------------ + +**Answer:** The prefork pool requires a working POSIX semaphore +implementation which isn't enabled in FreeBSD by default. You have to enable +POSIX semaphores in the kernel and manually recompile multiprocessing. + +Luckily, Viktor Petersson has written a tutorial to get you started with +Celery on FreeBSD here: +http://www.playingwithwire.com/2009/10/how-to-get-celeryd-to-work-on-freebsd/ + +.. _faq-duplicate-key-errors: + +I'm having `IntegrityError: Duplicate Key` errors. Why? +--------------------------------------------------------- + +**Answer:** See `MySQL is throwing deadlock errors, what can I do?`_. +Thanks to howsthedotcom. + +.. _faq-worker-stops-processing: + +Why aren't my tasks processed? +------------------------------ + +**Answer:** With RabbitMQ you can see how many consumers are currently +receiving tasks by running the following command: + +.. code-block:: bash + + $ rabbitmqctl list_queues -p name messages consumers + Listing queues ... + celery 2891 2 + +This shows that there's 2891 messages waiting to be processed in the task +queue, and there are two consumers processing them. + +One reason that the queue is never emptied could be that you have a stale +worker process taking the messages hostage. This could happen if the worker +wasn't properly shut down. + +When a message is received by a worker the broker waits for it to be +acknowledged before marking the message as processed. The broker will not +re-send that message to another consumer until the consumer is shut down +properly. + +If you hit this problem you have to kill all workers manually and restart +them:: + + ps auxww | grep celeryd | awk '{print $2}' | xargs kill + +You might have to wait a while until all workers have finished the work they're +doing. If it's still hanging after a long time you can kill them by force +with:: + + ps auxww | grep celeryd | awk '{print $2}' | xargs kill -9 + +.. _faq-task-does-not-run: + +Why won't my Task run? +---------------------- + +**Answer:** There might be syntax errors preventing the tasks module being imported. + +You can find out if Celery is able to run the task by executing the +task manually: + + >>> from myapp.tasks import MyPeriodicTask + >>> MyPeriodicTask.delay() + +Watch the workers log file to see if it's able to find the task, or if some +other error is happening. + +.. _faq-periodic-task-does-not-run: + +Why won't my periodic task run? +------------------------------- + +**Answer:** See `Why won't my Task run?`_. + +.. _faq-purge-the-queue: + +How do I purge all waiting tasks? +--------------------------------- + +**Answer:** You can use the ``celery purge`` command to purge +all configured task queues: + +.. code-block:: bash + + $ celery -A proj purge + +or programatically: + +.. code-block:: python + + >>> from proj.celery import app + >>> app.control.purge() + 1753 + +If you only want to purge messages from a specific queue +you have to use the AMQP API or the :program:`celery amqp` utility: + +.. code-block:: bash + + $ celery -A proj amqp queue.purge + +The number 1753 is the number of messages deleted. + +You can also start :mod:`~celery.bin.worker` with the +:option:`--purge` argument, to purge messages when the worker starts. + +.. _faq-messages-left-after-purge: + +I've purged messages, but there are still messages left in the queue? +--------------------------------------------------------------------- + +**Answer:** Tasks are acknowledged (removed from the queue) as soon +as they are actually executed. After the worker has received a task, it will +take some time until it is actually executed, especially if there are a lot +of tasks already waiting for execution. Messages that are not acknowledged are +held on to by the worker until it closes the connection to the broker (AMQP +server). When that connection is closed (e.g. because the worker was stopped) +the tasks will be re-sent by the broker to the next available worker (or the +same worker when it has been restarted), so to properly purge the queue of +waiting tasks you have to stop all the workers, and then purge the tasks +using :func:`celery.control.purge`. + +.. _faq-results: + +Results +======= + +.. _faq-get-result-by-task-id: + +How do I get the result of a task if I have the ID that points there? +---------------------------------------------------------------------- + +**Answer**: Use `task.AsyncResult`:: + + >>> result = my_task.AsyncResult(task_id) + >>> result.get() + +This will give you a :class:`~celery.result.AsyncResult` instance +using the tasks current result backend. + +If you need to specify a custom result backend, or you want to use +the current application's default backend you can use +:class:`@Celery.AsyncResult`: + + >>> result = app.AsyncResult(task_id) + >>> result.get() + +.. _faq-security: + +Security +======== + +Isn't using `pickle` a security concern? +---------------------------------------- + +**Answer**: Yes, indeed it is. + +You are right to have a security concern, as this can indeed be a real issue. +It is essential that you protect against unauthorized +access to your broker, databases and other services transmitting pickled +data. + +For the task messages you can set the :setting:`CELERY_TASK_SERIALIZER` +setting to "json" or "yaml" instead of pickle. There is +currently no alternative solution for task results (but writing a +custom result backend using JSON is a simple task) + +Note that this is not just something you should be aware of with Celery, for +example also Django uses pickle for its cache client. + +Can messages be encrypted? +-------------------------- + +**Answer**: Some AMQP brokers supports using SSL (including RabbitMQ). +You can enable this using the :setting:`BROKER_USE_SSL` setting. + +It is also possible to add additional encryption and security to messages, +if you have a need for this then you should contact the :ref:`mailing-list`. + +Is it safe to run :program:`celery worker` as root? +--------------------------------------------------- + +**Answer**: No! + +We're not currently aware of any security issues, but it would +be incredibly naive to assume that they don't exist, so running +the Celery services (:program:`celery worker`, :program:`celery beat`, +:program:`celeryev`, etc) as an unprivileged user is recommended. + +.. _faq-brokers: + +Brokers +======= + +Why is RabbitMQ crashing? +------------------------- + +**Answer:** RabbitMQ will crash if it runs out of memory. This will be fixed in a +future release of RabbitMQ. please refer to the RabbitMQ FAQ: +http://www.rabbitmq.com/faq.html#node-runs-out-of-memory + +.. note:: + + This is no longer the case, RabbitMQ versions 2.0 and above + includes a new persister, that is tolerant to out of memory + errors. RabbitMQ 2.1 or higher is recommended for Celery. + + If you're still running an older version of RabbitMQ and experience + crashes, then please upgrade! + +Misconfiguration of Celery can eventually lead to a crash +on older version of RabbitMQ. Even if it doesn't crash, this +can still consume a lot of resources, so it is very +important that you are aware of the common pitfalls. + +* Events. + +Running :mod:`~celery.bin.worker` with the :option:`-E`/:option:`--events` +option will send messages for events happening inside of the worker. + +Events should only be enabled if you have an active monitor consuming them, +or if you purge the event queue periodically. + +* AMQP backend results. + +When running with the AMQP result backend, every task result will be sent +as a message. If you don't collect these results, they will build up and +RabbitMQ will eventually run out of memory. + +Results expire after 1 day by default. It may be a good idea +to lower this value by configuring the :setting:`CELERY_TASK_RESULT_EXPIRES` +setting. + +If you don't use the results for a task, make sure you set the +`ignore_result` option: + +.. code-block python + + @app.task(ignore_result=True) + def mytask(): + … + + class MyTask(Task): + ignore_result = True + +.. _faq-use-celery-with-stomp: + +Can I use Celery with ActiveMQ/STOMP? +------------------------------------- + +**Answer**: No. It used to be supported by Carrot, +but is not currently supported in Kombu. + +.. _faq-non-amqp-missing-features: + +What features are not supported when not using an AMQP broker? +-------------------------------------------------------------- + +This is an incomplete list of features not available when +using the virtual transports: + + * Remote control commands (supported only by Redis). + + * Monitoring with events may not work in all virtual transports. + + * The `header` and `fanout` exchange types + (`fanout` is supported by Redis). + +.. _faq-tasks: + +Tasks +===== + +.. _faq-tasks-connection-reuse: + +How can I reuse the same connection when calling tasks? +------------------------------------------------------- + +**Answer**: See the :setting:`BROKER_POOL_LIMIT` setting. +The connection pool is enabled by default since version 2.5. + +.. _faq-sudo-subprocess: + +Sudo in a :mod:`subprocess` returns :const:`None` +------------------------------------------------- + +There is a sudo configuration option that makes it illegal for process +without a tty to run sudo:: + + Defaults requiretty + +If you have this configuration in your :file:`/etc/sudoers` file then +tasks will not be able to call sudo when the worker is running as a daemon. +If you want to enable that, then you need to remove the line from sudoers. + +See: http://timelordz.com/wiki/Apache_Sudo_Commands + +.. _faq-deletes-unknown-tasks: + +Why do workers delete tasks from the queue if they are unable to process them? +------------------------------------------------------------------------------ +**Answer**: + +The worker rejects unknown tasks, messages with encoding errors and messages +that doesn't contain the proper fields (as per the task message protocol). + +If it did not reject them they could be redelivered again and again, +causing a loop. + +Recent versions of RabbitMQ has the ability to configure a dead-letter +queue for exchange, so that rejected messages is moved there. + +.. _faq-execute-task-by-name: + +Can I call a task by name? +----------------------------- + +**Answer**: Yes. Use :func:`celery.execute.send_task`. +You can also call a task by name from any language +that has an AMQP client. + + >>> from celery.execute import send_task + >>> send_task("tasks.add", args=[2, 2], kwargs={}) + + +.. _faq-get-current-task-id: + +How can I get the task id of the current task? +---------------------------------------------- + +**Answer**: The current id and more is available in the task request:: + + @app.task(bind=True) + def mytask(self): + cache.set(self.request.id, "Running") + +For more information see :ref:`task-request-info`. + +.. _faq-custom-task-ids: + +Can I specify a custom task_id? +------------------------------- + +**Answer**: Yes. Use the `task_id` argument to :meth:`Task.apply_async`:: + + >>> task.apply_async(args, kwargs, task_id='…') + + +Can I use decorators with tasks? +-------------------------------- + +**Answer**: Yes. But please see note in the sidebar at :ref:`task-basics`. + +.. _faq-natural-task-ids: + +Can I use natural task ids? +--------------------------- + +**Answer**: Yes, but make sure it is unique, as the behavior +for two tasks existing with the same id is undefined. + +The world will probably not explode, but at the worst +they can overwrite each others results. + +.. _faq-task-callbacks: + +How can I run a task once another task has finished? +---------------------------------------------------- + +**Answer**: You can safely launch a task inside a task. +Also, a common pattern is to add callbacks to tasks: + +.. code-block:: python + + from celery.utils.log import get_task_logger + + logger = get_task_logger(__name__) + + @app.task + def add(x, y): + return x + y + + @app.task(ignore_result=True) + def log_result(result): + logger.info("log_result got: %r", result) + +Invocation:: + + >>> (add.s(2, 2) | log_result.s()).delay() + +See :doc:`userguide/canvas` for more information. + +.. _faq-cancel-task: + +Can I cancel the execution of a task? +------------------------------------- +**Answer**: Yes. Use `result.revoke`:: + + >>> result = add.apply_async(args=[2, 2], countdown=120) + >>> result.revoke() + +or if you only have the task id:: + + >>> from proj.celery import app + >>> app.control.revoke(task_id) + +.. _faq-node-not-receiving-broadcast-commands: + +Why aren't my remote control commands received by all workers? +-------------------------------------------------------------- + +**Answer**: To receive broadcast remote control commands, every worker node +uses its host name to create a unique queue name to listen to, +so if you have more than one worker with the same host name, the +control commands will be received in round-robin between them. + +To work around this you can explicitly set the nodename for every worker +using the :option:`-n` argument to :mod:`~celery.bin.worker`: + +.. code-block:: bash + + $ celery -A proj worker -n worker1@%h + $ celery -A proj worker -n worker2@%h + +where ``%h`` is automatically expanded into the current hostname. + +.. _faq-task-routing: + +Can I send some tasks to only some servers? +-------------------------------------------- + +**Answer:** Yes. You can route tasks to an arbitrary server using AMQP, +and a worker can bind to as many queues as it wants. + +See :doc:`userguide/routing` for more information. + +.. _faq-change-periodic-task-interval-at-runtime: + +Can I change the interval of a periodic task at runtime? +-------------------------------------------------------- + +**Answer**: Yes. You can use the Django database scheduler, or you can +create a new schedule subclass and override +:meth:`~celery.schedules.schedule.is_due`: + +.. code-block:: python + + from celery.schedules import schedule + + + class my_schedule(schedule): + + def is_due(self, last_run_at): + return … + +.. _faq-task-priorities: + +Does celery support task priorities? +------------------------------------ + +**Answer**: No. In theory, yes, as AMQP supports priorities. However +RabbitMQ doesn't implement them yet. + +The usual way to prioritize work in Celery, is to route high priority tasks +to different servers. In the real world this may actually work better than per message +priorities. You can use this in combination with rate limiting to achieve a +highly responsive system. + +.. _faq-acks_late-vs-retry: + +Should I use retry or acks_late? +-------------------------------- + +**Answer**: Depends. It's not necessarily one or the other, you may want +to use both. + +`Task.retry` is used to retry tasks, notably for expected errors that +is catchable with the `try:` block. The AMQP transaction is not used +for these errors: **if the task raises an exception it is still acknowledged!**. + +The `acks_late` setting would be used when you need the task to be +executed again if the worker (for some reason) crashes mid-execution. +It's important to note that the worker is not known to crash, and if +it does it is usually an unrecoverable error that requires human +intervention (bug in the worker, or task code). + +In an ideal world you could safely retry any task that has failed, but +this is rarely the case. Imagine the following task: + +.. code-block:: python + + @app.task + def process_upload(filename, tmpfile): + # Increment a file count stored in a database + increment_file_counter() + add_file_metadata_to_db(filename, tmpfile) + copy_file_to_destination(filename, tmpfile) + +If this crashed in the middle of copying the file to its destination +the world would contain incomplete state. This is not a critical +scenario of course, but you can probably imagine something far more +sinister. So for ease of programming we have less reliability; +It's a good default, users who require it and know what they +are doing can still enable acks_late (and in the future hopefully +use manual acknowledgement) + +In addition `Task.retry` has features not available in AMQP +transactions: delay between retries, max retries, etc. + +So use retry for Python errors, and if your task is idempotent +combine that with `acks_late` if that level of reliability +is required. + +.. _faq-schedule-at-specific-time: + +Can I schedule tasks to execute at a specific time? +--------------------------------------------------- + +.. module:: celery.task.base + +**Answer**: Yes. You can use the `eta` argument of :meth:`Task.apply_async`. + +Or to schedule a periodic task at a specific time, use the +:class:`celery.schedules.crontab` schedule behavior: + + +.. code-block:: python + + from celery.schedules import crontab + from celery.task import periodic_task + + @periodic_task(run_every=crontab(hour=7, minute=30, day_of_week="mon")) + def every_monday_morning(): + print("This is run every Monday morning at 7:30") + +.. _faq-safe-worker-shutdown: + +How can I safely shut down the worker? +-------------------------------------- + +**Answer**: Use the :sig:`TERM` signal, and the worker will finish all currently +executing jobs and shut down as soon as possible. No tasks should be lost. + +You should never stop :mod:`~celery.bin.worker` with the :sig:`KILL` signal +(:option:`-9`), unless you've tried :sig:`TERM` a few times and waited a few +minutes to let it get a chance to shut down. As if you do tasks may be +terminated mid-execution, and they will not be re-run unless you have the +`acks_late` option set (`Task.acks_late` / :setting:`CELERY_ACKS_LATE`). + +.. seealso:: + + :ref:`worker-stopping` + +.. _faq-daemonizing: + +How do I run the worker in the background on [platform]? +-------------------------------------------------------- +**Answer**: Please see :ref:`daemonizing`. + +.. _faq-django: + +Django +====== + +.. _faq-django-database-tables: + +What purpose does the database tables created by django-celery have? +-------------------------------------------------------------------- + +Several database tables are created by default, these relate to + +* Monitoring + + When you use the django-admin monitor, the cluster state is written + to the ``TaskState`` and ``WorkerState`` models. + +* Periodic tasks + + When the database-backed schedule is used the periodic task + schedule is taken from the ``PeriodicTask`` model, there are + also several other helper tables (``IntervalSchedule``, + ``CrontabSchedule``, ``PeriodicTasks``). + +* Task results + + The database result backend is enabled by default when using django-celery + (this is for historical reasons, and thus for backward compatibility). + + The results are stored in the ``TaskMeta`` and ``TaskSetMeta`` models. + *these tables are not created if another result backend is configured*. + +.. _faq-windows: + +Windows +======= + +.. _faq-windows-worker-embedded-beat: + +The `-B` / `--beat` option to worker doesn't work? +---------------------------------------------------------------- +**Answer**: That's right. Run `celery beat` and `celery worker` as separate +services instead. diff --git a/docs/getting-started/brokers/beanstalk.rst b/docs/getting-started/brokers/beanstalk.rst new file mode 100644 index 0000000..4854310 --- /dev/null +++ b/docs/getting-started/brokers/beanstalk.rst @@ -0,0 +1,63 @@ +.. _broker-beanstalk: + +================= + Using Beanstalk +================= + +.. _broker-beanstalk-installation: + +.. admonition:: Out of order + + The Beanstalk transport is currently not working well. + + We are interested in contributions and donations that can go towards + improving this situation. + + + +Installation +============ + +For the Beanstalk support you have to install additional dependencies. +You can install both Celery and these dependencies in one go using +the ``celery[beanstalk]`` :ref:`bundle `: + +.. code-block:: bash + + $ pip install -U celery[beanstalk] + +.. _broker-beanstalk-configuration: + +Configuration +============= + +Configuration is easy, set the transport, and configure the location of +your Beanstalk database:: + + BROKER_URL = 'beanstalk://localhost:11300' + +Where the URL is in the format of:: + + beanstalk://hostname:port + +The host name will default to ``localhost`` and the port to 11300, +and so they are optional. + +.. _beanstalk-results-configuration: + +Results +------- + +Using Beanstalk to store task state and results is currently **not supported**. + +.. _broker-beanstalk-limitations: + +Limitations +=========== + +The Beanstalk message transport does not currently support: + + * Remote control commands (:program:`celery control`, + :program:`celery inspect`, broadcast) + * Authentication + diff --git a/docs/getting-started/brokers/couchdb.rst b/docs/getting-started/brokers/couchdb.rst new file mode 100644 index 0000000..d731ef0 --- /dev/null +++ b/docs/getting-started/brokers/couchdb.rst @@ -0,0 +1,60 @@ +.. _broker-couchdb: + +=============== + Using CouchDB +=============== + +.. admonition:: Experimental Status + + The CouchDB transport is in need of improvements in many areas and there + are several open bugs. Unfortunately we don't have the resources or funds + required to improve the situation, so we're looking for contributors + and partners willing to help. + +.. _broker-couchdb-installation: + +Installation +============ + +For the CouchDB support you have to install additional dependencies. +You can install both Celery and these dependencies in one go using +the ``celery[couchdb]`` :ref:`bundle `: + +.. code-block:: bash + + $ pip install -U celery[couchdb] + +.. _broker-couchdb-configuration: + +Configuration +============= + +Configuration is easy, set the transport, and configure the location of +your CouchDB database:: + + BROKER_URL = 'couchdb://localhost:5984/database_name' + +Where the URL is in the format of:: + + couchdb://userid:password@hostname:port/database_name + +The host name will default to ``localhost`` and the port to 5984, +and so they are optional. userid and password are also optional, +but needed if your CouchDB server requires authentication. + +.. _couchdb-results-configuration: + +Results +------- + +Storing task state and results in CouchDB is currently **not supported**. + +.. _broker-couchdb-limitations: + +Limitations +=========== + +The CouchDB message transport does not currently support: + + * Remote control commands (:program:`celery inspect`, + :program:`celery control`, broadcast) diff --git a/docs/getting-started/brokers/django.rst b/docs/getting-started/brokers/django.rst new file mode 100644 index 0000000..d4358d7 --- /dev/null +++ b/docs/getting-started/brokers/django.rst @@ -0,0 +1,51 @@ +.. _broker-django: + +=========================== + Using the Django Database +=========================== + +.. admonition:: Experimental Status + + The Django database transport is in need of improvements in many areas + and there are several open bugs. Unfortunately we don't have the resources or funds + required to improve the situation, so we're looking for contributors + and partners willing to help. + +.. _broker-django-installation: + +Installation +============ + +.. _broker-django-configuration: + +Configuration +============= + +The database transport uses the Django `DATABASE_*` settings for database +configuration values. + +#. Set your broker transport:: + + BROKER_URL = 'django://' + +#. Add :mod:`kombu.transport.django` to `INSTALLED_APPS`:: + + INSTALLED_APPS = ('kombu.transport.django', ) + +#. Sync your database schema: + +.. code-block:: bash + + $ python manage.py syncdb + +.. _broker-django-limitations: + +Limitations +=========== + +The Django database transport does not currently support: + + * Remote control commands (:program:`celery events` command, broadcast) + * Events, including the Django Admin monitor. + * Using more than a few workers (can lead to messages being executed + multiple times). diff --git a/docs/getting-started/brokers/index.rst b/docs/getting-started/brokers/index.rst new file mode 100644 index 0000000..ee59557 --- /dev/null +++ b/docs/getting-started/brokers/index.rst @@ -0,0 +1,79 @@ +.. _brokers: + +===================== + Brokers +===================== + +:Release: |version| +:Date: |today| + +Celery supports several message transport alternatives. + +.. _broker_toc: + +Broker Instructions +=================== + +.. toctree:: + :maxdepth: 1 + + rabbitmq + redis + +Experimental Transports +======================= + +.. toctree:: + :maxdepth: 1 + + sqlalchemy + django + mongodb + sqs + couchdb + beanstalk + ironmq + +.. _broker-overview: + +Broker Overview +=============== + +This is comparison table of the different transports supports, +more information can be found in the documentation for each +individual transport (see :ref:`broker_toc`). + ++---------------+--------------+----------------+--------------------+ +| **Name** | **Status** | **Monitoring** | **Remote Control** | ++---------------+--------------+----------------+--------------------+ +| *RabbitMQ* | Stable | Yes | Yes | ++---------------+--------------+----------------+--------------------+ +| *Redis* | Stable | Yes | Yes | ++---------------+--------------+----------------+--------------------+ +| *Mongo DB* | Experimental | Yes | Yes | ++---------------+--------------+----------------+--------------------+ +| *Beanstalk* | Experimental | No | No | ++---------------+--------------+----------------+--------------------+ +| *Amazon SQS* | Experimental | No | No | ++---------------+--------------+----------------+--------------------+ +| *Couch DB* | Experimental | No | No | ++---------------+--------------+----------------+--------------------+ +| *Zookeeper* | Experimental | No | No | ++---------------+--------------+----------------+--------------------+ +| *Django DB* | Experimental | No | No | ++---------------+--------------+----------------+--------------------+ +| *SQLAlchemy* | Experimental | No | No | ++---------------+--------------+----------------+--------------------+ +| *Iron MQ* | 3rd party | No | No | ++---------------+--------------+----------------+--------------------+ + +Experimental brokers may be functional but they do not have +dedicated maintainers. + +Missing monitor support means that the transport does not +implement events, and as such Flower, `celery events`, `celerymon` +and other event-based monitoring tools will not work. + +Remote control means the ability to inspect and manage workers +at runtime using the `celery inspect` and `celery control` commands +(and other tools using the remote control API). diff --git a/docs/getting-started/brokers/ironmq.rst b/docs/getting-started/brokers/ironmq.rst new file mode 100644 index 0000000..49ddcf4 --- /dev/null +++ b/docs/getting-started/brokers/ironmq.rst @@ -0,0 +1,70 @@ +.. _broker-ironmq: + +================== + Using IronMQ +================== + +.. _broker-ironmq-installation: + +Installation +============ + +For IronMQ support, you'll need the [iron_celery](http://github.com/iron-io/iron_celery) library: + +.. code-block:: bash + + $ pip install iron_celery + +As well as an [Iron.io account](http://www.iron.io). Sign up for free at [iron.io](http://www.iron.io). + +.. _broker-ironmq-configuration: + +Configuration +============= + +First, you'll need to import the iron_celery library right after you import Celery, for example:: + + from celery import Celery + import iron_celery + + app = Celery('mytasks', broker='ironmq://', backend='ironcache://') + +You have to specify IronMQ in the broker URL:: + + BROKER_URL = 'ironmq://ABCDEFGHIJKLMNOPQRST:ZYXK7NiynGlTogH8Nj+P9nlE73sq3@' + +where the URL format is:: + + ironmq://project_id:token@ + +you must *remember to include the "@" at the end*. + +The login credentials can also be set using the environment variables +:envvar:`IRON_TOKEN` and :envvar:`IRON_PROJECT_ID`, which are set automatically if you use the IronMQ Heroku add-on. +And in this case the broker url may only be:: + + ironmq:// + +Clouds +------ + +The default cloud/region is ``AWS us-east-1``. You can choose the IronMQ Rackspace (ORD) cloud by changing the URL to:: + + ironmq://project_id:token@mq-rackspace-ord.iron.io + +Results +======= + +You can store results in IronCache with the same Iron.io credentials, just set the results URL with the same syntax +as the broker URL, but changing the start to ``ironcache``:: + + ironcache:://project_id:token@ + +This will default to a cache named "Celery", if you want to change that:: + + ironcache:://project_id:token@/awesomecache + +More Information +================ + +You can find more information in the [iron_celery README](http://github.com/iron-io/iron_celery). diff --git a/docs/getting-started/brokers/mongodb.rst b/docs/getting-started/brokers/mongodb.rst new file mode 100644 index 0000000..3947368 --- /dev/null +++ b/docs/getting-started/brokers/mongodb.rst @@ -0,0 +1,51 @@ +.. _broker-mongodb: + +=============== + Using MongoDB +=============== + +.. admonition:: Experimental Status + + The MongoDB transport is in need of improvements in many areas and there + are several open bugs. Unfortunately we don't have the resources or funds + required to improve the situation, so we're looking for contributors + and partners willing to help. + +.. _broker-mongodb-installation: + +Installation +============ + +For the MongoDB support you have to install additional dependencies. +You can install both Celery and these dependencies in one go using +the ``celery[mongodb]`` :ref:`bundle `: + +.. code-block:: bash + + $ pip install -U celery[mongodb] + +.. _broker-mongodb-configuration: + +Configuration +============= + +Configuration is easy, set the transport, and configure the location of +your MongoDB database:: + + BROKER_URL = 'mongodb://localhost:27017/database_name' + +Where the URL is in the format of:: + + mongodb://userid:password@hostname:port/database_name + +The host name will default to ``localhost`` and the port to 27017, +and so they are optional. userid and password are also optional, +but needed if your MongoDB server requires authentication. + +.. _mongodb-results-configuration: + +Results +------- + +If you also want to store the state and return values of tasks in MongoDB, +you should see :ref:`conf-mongodb-result-backend`. diff --git a/docs/getting-started/brokers/rabbitmq.rst b/docs/getting-started/brokers/rabbitmq.rst new file mode 100644 index 0000000..1df1656 --- /dev/null +++ b/docs/getting-started/brokers/rabbitmq.rst @@ -0,0 +1,157 @@ +.. _broker-rabbitmq: + +================ + Using RabbitMQ +================ + +.. contents:: + :local: + +Installation & Configuration +============================ + +RabbitMQ is the default broker so it does not require any additional +dependencies or initial configuration, other than the URL location of +the broker instance you want to use:: + + >>> BROKER_URL = 'amqp://guest:guest@localhost:5672//' + +For a description of broker URLs and a full list of the +various broker configuration options available to Celery, +see :ref:`conf-broker-settings`. + +.. _installing-rabbitmq: + +Installing the RabbitMQ Server +============================== + +See `Installing RabbitMQ`_ over at RabbitMQ's website. For Mac OS X +see `Installing RabbitMQ on OS X`_. + +.. _`Installing RabbitMQ`: http://www.rabbitmq.com/install.html + +.. note:: + + If you're getting `nodedown` errors after installing and using + :program:`rabbitmqctl` then this blog post can help you identify + the source of the problem: + + http://somic.org/2009/02/19/on-rabbitmqctl-and-badrpcnodedown/ + +.. _rabbitmq-configuration: + +Setting up RabbitMQ +------------------- + +To use celery we need to create a RabbitMQ user, a virtual host and +allow that user access to that virtual host: + +.. code-block:: bash + + $ sudo rabbitmqctl add_user myuser mypassword + +.. code-block:: bash + + $ sudo rabbitmqctl add_vhost myvhost + +.. code-block:: bash + + $ sudo rabbitmqctl set_permissions -p myvhost myuser ".*" ".*" ".*" + +See the RabbitMQ `Admin Guide`_ for more information about `access control`_. + +.. _`Admin Guide`: http://www.rabbitmq.com/admin-guide.html + +.. _`access control`: http://www.rabbitmq.com/admin-guide.html#access-control + +.. _rabbitmq-osx-installation: + +Installing RabbitMQ on OS X +--------------------------- + +The easiest way to install RabbitMQ on OS X is using `Homebrew`_ the new and +shiny package management system for OS X. + +First, install homebrew using the one-line command provided by the `Homebrew +documentation`_: + +.. code-block:: bash + + ruby -e "$(curl -fsSL https://raw.github.com/Homebrew/homebrew/go/install)" + +Finally, we can install rabbitmq using :program:`brew`: + +.. code-block:: bash + + $ brew install rabbitmq + +.. _`Homebrew`: http://github.com/mxcl/homebrew/ +.. _`Homebrew documentation`: https://github.com/Homebrew/homebrew/wiki/Installation + +.. _rabbitmq-osx-system-hostname: + +Configuring the system host name +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If you're using a DHCP server that is giving you a random host name, you need +to permanently configure the host name. This is because RabbitMQ uses the host name +to communicate with nodes. + +Use the :program:`scutil` command to permanently set your host name: + +.. code-block:: bash + + $ sudo scutil --set HostName myhost.local + +Then add that host name to :file:`/etc/hosts` so it's possible to resolve it +back into an IP address:: + + 127.0.0.1 localhost myhost myhost.local + +If you start the rabbitmq server, your rabbit node should now be `rabbit@myhost`, +as verified by :program:`rabbitmqctl`: + +.. code-block:: bash + + $ sudo rabbitmqctl status + Status of node rabbit@myhost ... + [{running_applications,[{rabbit,"RabbitMQ","1.7.1"}, + {mnesia,"MNESIA CXC 138 12","4.4.12"}, + {os_mon,"CPO CXC 138 46","2.2.4"}, + {sasl,"SASL CXC 138 11","2.1.8"}, + {stdlib,"ERTS CXC 138 10","1.16.4"}, + {kernel,"ERTS CXC 138 10","2.13.4"}]}, + {nodes,[rabbit@myhost]}, + {running_nodes,[rabbit@myhost]}] + ...done. + +This is especially important if your DHCP server gives you a host name +starting with an IP address, (e.g. `23.10.112.31.comcast.net`), because +then RabbitMQ will try to use `rabbit@23`, which is an illegal host name. + +.. _rabbitmq-osx-start-stop: + +Starting/Stopping the RabbitMQ server +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +To start the server: + +.. code-block:: bash + + $ sudo rabbitmq-server + +you can also run it in the background by adding the :option:`-detached` option +(note: only one dash): + +.. code-block:: bash + + $ sudo rabbitmq-server -detached + +Never use :program:`kill` to stop the RabbitMQ server, but rather use the +:program:`rabbitmqctl` command: + +.. code-block:: bash + + $ sudo rabbitmqctl stop + +When the server is running, you can continue reading `Setting up RabbitMQ`_. diff --git a/docs/getting-started/brokers/redis.rst b/docs/getting-started/brokers/redis.rst new file mode 100644 index 0000000..6a1d6e3 --- /dev/null +++ b/docs/getting-started/brokers/redis.rst @@ -0,0 +1,140 @@ +.. _broker-redis: + +============= + Using Redis +============= + +.. _broker-redis-installation: + +Installation +============ + +For the Redis support you have to install additional dependencies. +You can install both Celery and these dependencies in one go using +the ``celery[redis]`` :ref:`bundle `: + +.. code-block:: bash + + $ pip install -U celery[redis] + +.. _broker-redis-configuration: + +Configuration +============= + +Configuration is easy, just configure the location of +your Redis database:: + + BROKER_URL = 'redis://localhost:6379/0' + +Where the URL is in the format of:: + + redis://:password@hostname:port/db_number + +all fields after the scheme are optional, and will default to localhost on port 6379, +using database 0. + +.. _redis-visibility_timeout: + +Visibility Timeout +------------------ + +The visibility timeout defines the number of seconds to wait +for the worker to acknowledge the task before the message is redelivered +to another worker. Be sure to see :ref:`redis-caveats` below. + +This option is set via the :setting:`BROKER_TRANSPORT_OPTIONS` setting:: + + BROKER_TRANSPORT_OPTIONS = {'visibility_timeout': 3600} # 1 hour. + +The default visibility timeout for Redis is 1 hour. + +.. _redis-results-configuration: + +Results +------- + +If you also want to store the state and return values of tasks in Redis, +you should configure these settings:: + + CELERY_RESULT_BACKEND = 'redis://localhost:6379/0' + +For a complete list of options supported by the Redis result backend, see +:ref:`conf-redis-result-backend` + +.. _redis-caveats: + +Caveats +======= + +.. _redis-caveat-fanout-prefix: + +- Broadcast messages will be seen by all virtual hosts by default. + + You have to set a transport option to prefix the messages so that + they will only be received by the active virtual host:: + + BROKER_TRANSPORT_OPTIONS = {'fanout_prefix': True} + + Note that you will not be able to communicate with workers running older + versions or workers that does not have this setting enabled. + + This setting will be the default in the future, so better to migrate + sooner rather than later. + +.. _redis-caveat-fanout-patterns: + +- Workers will receive all task related events by default. + + To avoid this you must set the ``fanout_patterns`` fanout option so that + the workers may only subscribe to worker related events:: + + BROKER_TRANSPORT_OPTIONS = {'fanout_patterns': True} + + Note that this change is backward incompatible so all workers in the + cluster must have this option enabled, or else they will not be able to + communicate. + + This option will be enabled by default in the future. + +- If a task is not acknowledged within the :ref:`redis-visibility_timeout` + the task will be redelivered to another worker and executed. + + This causes problems with ETA/countdown/retry tasks where the + time to execute exceeds the visibility timeout; in fact if that + happens it will be executed again, and again in a loop. + + So you have to increase the visibility timeout to match + the time of the longest ETA you are planning to use. + + Note that Celery will redeliver messages at worker shutdown, + so having a long visibility timeout will only delay the redelivery + of 'lost' tasks in the event of a power failure or forcefully terminated + workers. + + Periodic tasks will not be affected by the visibility timeout, + as this is a concept separate from ETA/countdown. + + You can increase this timeout by configuring a transport option + with the same name:: + + BROKER_TRANSPORT_OPTIONS = {'visibility_timeout': 43200} + + The value must be an int describing the number of seconds. + + +- Monitoring events (as used by flower and other tools) are global + and is not affected by the virtual host setting. + + This is caused by a limitation in Redis. The Redis PUB/SUB channels + are global and not affected by the database number. + +- Redis may evict keys from the database in some situations + + If you experience an error like:: + + InconsistencyError, Probably the key ('_kombu.binding.celery') has been + removed from the Redis database. + + you may want to configure the redis-server to not evict keys by setting + the ``timeout`` parameter to 0. diff --git a/docs/getting-started/brokers/sqlalchemy.rst b/docs/getting-started/brokers/sqlalchemy.rst new file mode 100644 index 0000000..0f8cb7b --- /dev/null +++ b/docs/getting-started/brokers/sqlalchemy.rst @@ -0,0 +1,75 @@ +.. _broker-sqlalchemy: + +================== + Using SQLAlchemy +================== + +.. admonition:: Experimental Status + + The SQLAlchemy transport is unstable in many areas and there are + several issues open. Unfortunately we don't have the resources or funds + required to improve the situation, so we're looking for contributors + and partners willing to help. + +.. _broker-sqlalchemy-installation: + +Installation +============ + +.. _broker-sqlalchemy-configuration: + +Configuration +============= + +Celery needs to know the location of your database, which should be the usual +SQLAlchemy connection string, but with 'sqla+' prepended to it:: + + BROKER_URL = 'sqla+sqlite:///celerydb.sqlite' + +This transport uses only the :setting:`BROKER_URL` setting, which have to be +an SQLAlchemy database URI. + + +Please see `SQLAlchemy: Supported Databases`_ for a table of supported databases. + +Here's a list of examples using a selection of other `SQLAlchemy Connection String`_'s: + +.. code-block:: python + + # sqlite (filename) + BROKER_URL = 'sqla+sqlite:///celerydb.sqlite' + + # mysql + BROKER_URL = 'sqla+mysql://scott:tiger@localhost/foo' + + # postgresql + BROKER_URL = 'sqla+postgresql://scott:tiger@localhost/mydatabase' + + # oracle + BROKER_URL = 'sqla+oracle://scott:tiger@127.0.0.1:1521/sidname' + +.. _`SQLAlchemy: Supported Databases`: + http://www.sqlalchemy.org/docs/core/engines.html#supported-databases + +.. _`SQLAlchemy Connection String`: + http://www.sqlalchemy.org/docs/core/engines.html#database-urls + +.. _sqlalchemy-results-configuration: + +Results +------- + +To store results in the database as well, you should configure the result +backend. See :ref:`conf-database-result-backend`. + +.. _broker-sqlalchemy-limitations: + +Limitations +=========== + +The SQLAlchemy database transport does not currently support: + + * Remote control commands (:program:`celery events` command, broadcast) + * Events, including the Django Admin monitor. + * Using more than a few workers (can lead to messages being executed + multiple times). diff --git a/docs/getting-started/brokers/sqs.rst b/docs/getting-started/brokers/sqs.rst new file mode 100644 index 0000000..9f23314 --- /dev/null +++ b/docs/getting-started/brokers/sqs.rst @@ -0,0 +1,163 @@ +.. _broker-sqs: + +================== + Using Amazon SQS +================== + +.. admonition:: Experimental Status + + The SQS transport is in need of improvements in many areas and there + are several open bugs. Unfortunately we don't have the resources or funds + required to improve the situation, so we're looking for contributors + and partners willing to help. + +.. _broker-sqs-installation: + +Installation +============ + +For the Amazon SQS support you have to install the `boto`_ library: + +.. code-block:: bash + + $ pip install -U boto + +.. _boto: + http://pypi.python.org/pypi/boto + +.. _broker-sqs-configuration: + +Configuration +============= + +You have to specify SQS in the broker URL:: + + BROKER_URL = 'sqs://ABCDEFGHIJKLMNOPQRST:ZYXK7NiynGlTogH8Nj+P9nlE73sq3@' + +where the URL format is:: + + sqs://aws_access_key_id:aws_secret_access_key@ + +you must *remember to include the "@" at the end*. + +The login credentials can also be set using the environment variables +:envvar:`AWS_ACCESS_KEY_ID` and :envvar:`AWS_SECRET_ACCESS_KEY`, +in that case the broker url may only be ``sqs://``. + +.. note:: + + If you specify AWS credentials in the broker URL, then please keep in mind + that the secret access key may contain unsafe characters that needs to be + URL encoded. + +Options +======= + +Region +------ + +The default region is ``us-east-1`` but you can select another region +by configuring the :setting:`BROKER_TRANSPORT_OPTIONS` setting:: + + BROKER_TRANSPORT_OPTIONS = {'region': 'eu-west-1'} + +.. seealso:: + + An overview of Amazon Web Services regions can be found here: + + http://aws.amazon.com/about-aws/globalinfrastructure/ + +Visibility Timeout +------------------ + +The visibility timeout defines the number of seconds to wait +for the worker to acknowledge the task before the message is redelivered +to another worker. Also see caveats below. + +This option is set via the :setting:`BROKER_TRANSPORT_OPTIONS` setting:: + + BROKER_TRANSPORT_OPTIONS = {'visibility_timeout': 3600} # 1 hour. + +The default visibility timeout is 30 seconds. + +Polling Interval +---------------- + +The polling interval decides the number of seconds to sleep between +unsuccessful polls. This value can be either an int or a float. +By default the value is 1 second, which means that the worker will +sleep for one second whenever there are no more messages to read. + +You should note that **more frequent polling is also more expensive, so increasing +the polling interval can save you money**. + +The polling interval can be set via the :setting:`BROKER_TRANSPORT_OPTIONS` +setting:: + + BROKER_TRANSPORT_OPTIONS = {'polling_interval': 0.3} + +Very frequent polling intervals can cause *busy loops*, which results in the +worker using a lot of CPU time. If you need sub-millisecond precision you +should consider using another transport, like `RabbitMQ `, +or `Redis `. + +Queue Prefix +------------ + +By default Celery will not assign any prefix to the queue names, +If you have other services using SQS you can configure it do so +using the :setting:`BROKER_TRANSPORT_OPTIONS` setting:: + + BROKER_TRANSPORT_OPTIONS = {'queue_name_prefix': 'celery-'} + + +.. _sqs-caveats: + +Caveats +======= + +- If a task is not acknowledged within the ``visibility_timeout``, + the task will be redelivered to another worker and executed. + + This causes problems with ETA/countdown/retry tasks where the + time to execute exceeds the visibility timeout; in fact if that + happens it will be executed again, and again in a loop. + + So you have to increase the visibility timeout to match + the time of the longest ETA you are planning to use. + + Note that Celery will redeliver messages at worker shutdown, + so having a long visibility timeout will only delay the redelivery + of 'lost' tasks in the event of a power failure or forcefully terminated + workers. + + Periodic tasks will not be affected by the visibility timeout, + as it is a concept separate from ETA/countdown. + + The maximum visibility timeout supported by AWS as of this writing + is 12 hours (43200 seconds):: + + BROKER_TRANSPORT_OPTIONS = {'visibility_timeout': 43200} + +- SQS does not yet support worker remote control commands. + +- SQS does not yet support events, and so cannot be used with + :program:`celery events`, :program:`celerymon` or the Django Admin + monitor. + +.. _sqs-results-configuration: + +Results +------- + +Multiple products in the Amazon Web Services family could be a good candidate +to store or publish results with, but there is no such result backend included +at this point. + +.. warning:: + + Do not use the ``amqp`` result backend with SQS. + + It will create one queue for every task, and the queues will + not be collected. This could cost you money that would be better + spent contributing an AWS result store backend back to Celery :) diff --git a/docs/getting-started/first-steps-with-celery.rst b/docs/getting-started/first-steps-with-celery.rst new file mode 100644 index 0000000..86ddf38 --- /dev/null +++ b/docs/getting-started/first-steps-with-celery.rst @@ -0,0 +1,460 @@ +.. _tut-celery: +.. _first-steps: + +========================= + First Steps with Celery +========================= + +Celery is a task queue with batteries included. +It is easy to use so that you can get started without learning +the full complexities of the problem it solves. It is designed +around best practices so that your product can scale +and integrate with other languages, and it comes with the +tools and support you need to run such a system in production. + +In this tutorial you will learn the absolute basics of using Celery. +You will learn about; + +- Choosing and installing a message transport (broker). +- Installing Celery and creating your first task +- Starting the worker and calling tasks. +- Keeping track of tasks as they transition through different states, + and inspecting return values. + +Celery may seem daunting at first - but don't worry - this tutorial +will get you started in no time. It is deliberately kept simple, so +to not confuse you with advanced features. +After you have finished this tutorial +it's a good idea to browse the rest of the documentation, +for example the :ref:`next-steps` tutorial, which will +showcase Celery's capabilities. + +.. contents:: + :local: + +.. _celerytut-broker: + +Choosing a Broker +================= + +Celery requires a solution to send and receive messages, usually this +comes in the form of a separate service called a *message broker*. + +There are several choices available, including: + +RabbitMQ +-------- + +`RabbitMQ`_ is feature-complete, stable, durable and easy to install. +It's an excellent choice for a production environment. +Detailed information about using RabbitMQ with Celery: + + :ref:`broker-rabbitmq` + +.. _`RabbitMQ`: http://www.rabbitmq.com/ + +If you are using Ubuntu or Debian install RabbitMQ by executing this +command: + +.. code-block:: bash + + $ sudo apt-get install rabbitmq-server + +When the command completes the broker is already running in the background, +ready to move messages for you: ``Starting rabbitmq-server: SUCCESS``. + +And don't worry if you're not running Ubuntu or Debian, you can go to this +website to find similarly simple installation instructions for other +platforms, including Microsoft Windows: + + http://www.rabbitmq.com/download.html + + +Redis +----- + +`Redis`_ is also feature-complete, but is more susceptible to data loss in +the event of abrupt termination or power failures. Detailed information about using Redis: + + :ref:`broker-redis` + +.. _`Redis`: http://redis.io/ + + +Using a database +---------------- + +Using a database as a message queue is not recommended, but can be sufficient +for very small installations. Your options include: + +* :ref:`broker-sqlalchemy` +* :ref:`broker-django` + +If you're already using a Django database for example, using it as your +message broker can be convenient while developing even if you use a more +robust system in production. + +Other brokers +------------- + +In addition to the above, there are other experimental transport implementations +to choose from, including :ref:`Amazon SQS `, :ref:`broker-mongodb` +and :ref:`IronMQ `. + +See :ref:`broker-overview` for a full list. + +.. _celerytut-installation: + +Installing Celery +================= + +Celery is on the Python Package Index (PyPI), so it can be installed +with standard Python tools like ``pip`` or ``easy_install``: + +.. code-block:: bash + + $ pip install celery + +Application +=========== + +The first thing you need is a Celery instance, this is called the celery +application or just app in short. Since this instance is used as +the entry-point for everything you want to do in Celery, like creating tasks and +managing workers, it must be possible for other modules to import it. + +In this tutorial you will keep everything contained in a single module, +but for larger projects you want to create +a :ref:`dedicated module `. + +Let's create the file :file:`tasks.py`: + +.. code-block:: python + + from celery import Celery + + app = Celery('tasks', broker='amqp://guest@localhost//') + + @app.task + def add(x, y): + return x + y + +The first argument to :class:`~celery.app.Celery` is the name of the current module, +this is needed so that names can be automatically generated, the second +argument is the broker keyword argument which specifies the URL of the +message broker you want to use, using RabbitMQ here, which is already the +default option. See :ref:`celerytut-broker` above for more choices, +e.g. for RabbitMQ you can use ``amqp://localhost``, or for Redis you can +use ``redis://localhost``. + +You defined a single task, called ``add``, which returns the sum of two numbers. + +.. _celerytut-running-the-worker: + +Running the celery worker server +================================ + +You now run the worker by executing our program with the ``worker`` +argument: + +.. code-block:: bash + + $ celery -A tasks worker --loglevel=info + +.. note:: + + See the :ref:`celerytut-troubleshooting` section if the worker + does not start. + +In production you will want to run the worker in the +background as a daemon. To do this you need to use the tools provided +by your platform, or something like `supervisord`_ (see :ref:`daemonizing` +for more information). + +For a complete listing of the command-line options available, do: + +.. code-block:: bash + + $ celery worker --help + +There are also several other commands available, and help is also available: + +.. code-block:: bash + + $ celery help + +.. _`supervisord`: http://supervisord.org + +.. _celerytut-calling: + +Calling the task +================ + +To call our task you can use the :meth:`~@Task.delay` method. + +This is a handy shortcut to the :meth:`~@Task.apply_async` +method which gives greater control of the task execution (see +:ref:`guide-calling`):: + + >>> from tasks import add + >>> add.delay(4, 4) + +The task has now been processed by the worker you started earlier, +and you can verify that by looking at the workers console output. + +Calling a task returns an :class:`~@AsyncResult` instance, +which can be used to check the state of the task, wait for the task to finish +or get its return value (or if the task failed, the exception and traceback). +But this isn't enabled by default, and you have to configure Celery to +use a result backend, which is detailed in the next section. + +.. _celerytut-keeping-results: + +Keeping Results +=============== + +If you want to keep track of the tasks' states, Celery needs to store or send +the states somewhere. There are several +built-in result backends to choose from: `SQLAlchemy`_/`Django`_ ORM, +`Memcached`_, `Redis`_, AMQP (`RabbitMQ`_), and `MongoDB`_ -- or you can define your own. + +.. _`Memcached`: http://memcached.org +.. _`MongoDB`: http://www.mongodb.org +.. _`SQLAlchemy`: http://www.sqlalchemy.org/ +.. _`Django`: http://djangoproject.com + +For this example you will use the `amqp` result backend, which sends states +as messages. The backend is specified via the ``backend`` argument to +:class:`@Celery`, (or via the :setting:`CELERY_RESULT_BACKEND` setting if +you choose to use a configuration module):: + + app = Celery('tasks', backend='amqp', broker='amqp://') + +or if you want to use Redis as the result backend, but still use RabbitMQ as +the message broker (a popular combination):: + + app = Celery('tasks', backend='redis://localhost', broker='amqp://') + +To read more about result backends please see :ref:`task-result-backends`. + +Now with the result backend configured, let's call the task again. +This time you'll hold on to the :class:`~@AsyncResult` instance returned +when you call a task:: + + >>> result = add.delay(4, 4) + +The :meth:`~@AsyncResult.ready` method returns whether the task +has finished processing or not:: + + >>> result.ready() + False + +You can wait for the result to complete, but this is rarely used +since it turns the asynchronous call into a synchronous one:: + + >>> result.get(timeout=1) + 8 + +In case the task raised an exception, :meth:`~@AsyncResult.get` will +re-raise the exception, but you can override this by specifying +the ``propagate`` argument:: + + >>> result.get(propagate=False) + + +If the task raised an exception you can also gain access to the +original traceback:: + + >>> result.traceback + … + +See :mod:`celery.result` for the complete result object reference. + +.. _celerytut-configuration: + +Configuration +============= + +Celery, like a consumer appliance doesn't need much to be operated. +It has an input and an output, where you must connect the input to a broker and maybe +the output to a result backend if so wanted. But if you look closely at the back +there's a lid revealing loads of sliders, dials and buttons: this is the configuration. + +The default configuration should be good enough for most uses, but there's +many things to tweak so Celery works just the way you want it to. +Reading about the options available is a good idea to get familiar with what +can be configured. You can read about the options in the +:ref:`configuration` reference. + +The configuration can be set on the app directly or by using a dedicated +configuration module. +As an example you can configure the default serializer used for serializing +task payloads by changing the :setting:`CELERY_TASK_SERIALIZER` setting: + +.. code-block:: python + + app.conf.CELERY_TASK_SERIALIZER = 'json' + +If you are configuring many settings at once you can use ``update``: + +.. code-block:: python + + app.conf.update( + CELERY_TASK_SERIALIZER='json', + CELERY_ACCEPT_CONTENT=['json'], # Ignore other content + CELERY_RESULT_SERIALIZER='json', + CELERY_TIMEZONE='Europe/Oslo', + CELERY_ENABLE_UTC=True, + ) + +For larger projects using a dedicated configuration module is useful, +in fact you are discouraged from hard coding +periodic task intervals and task routing options, as it is much +better to keep this in a centralized location, and especially for libraries +it makes it possible for users to control how they want your tasks to behave, +you can also imagine your SysAdmin making simple changes to the configuration +in the event of system trouble. + +You can tell your Celery instance to use a configuration module, +by calling the :meth:`~@Celery.config_from_object` method: + +.. code-block:: python + + app.config_from_object('celeryconfig') + +This module is often called "``celeryconfig``", but you can use any +module name. + +A module named ``celeryconfig.py`` must then be available to load from the +current directory or on the Python path, it could look like this: + +:file:`celeryconfig.py`: + +.. code-block:: python + + BROKER_URL = 'amqp://' + CELERY_RESULT_BACKEND = 'amqp://' + + CELERY_TASK_SERIALIZER = 'json' + CELERY_RESULT_SERIALIZER = 'json' + CELERY_ACCEPT_CONTENT=['json'] + CELERY_TIMEZONE = 'Europe/Oslo' + CELERY_ENABLE_UTC = True + +To verify that your configuration file works properly, and doesn't +contain any syntax errors, you can try to import it: + +.. code-block:: bash + + $ python -m celeryconfig + +For a complete reference of configuration options, see :ref:`configuration`. + +To demonstrate the power of configuration files, this is how you would +route a misbehaving task to a dedicated queue: + +:file:`celeryconfig.py`: + +.. code-block:: python + + CELERY_ROUTES = { + 'tasks.add': 'low-priority', + } + +Or instead of routing it you could rate limit the task +instead, so that only 10 tasks of this type can be processed in a minute +(10/m): + +:file:`celeryconfig.py`: + +.. code-block:: python + + CELERY_ANNOTATIONS = { + 'tasks.add': {'rate_limit': '10/m'} + } + +If you are using RabbitMQ or Redis as the +broker then you can also direct the workers to set a new rate limit +for the task at runtime: + +.. code-block:: bash + + $ celery -A tasks control rate_limit tasks.add 10/m + worker@example.com: OK + new rate limit set successfully + +See :ref:`guide-routing` to read more about task routing, +and the :setting:`CELERY_ANNOTATIONS` setting for more about annotations, +or :ref:`guide-monitoring` for more about remote control commands, +and how to monitor what your workers are doing. + +Where to go from here +===================== + +If you want to learn more you should continue to the +:ref:`Next Steps ` tutorial, and after that you +can study the :ref:`User Guide `. + +.. _celerytut-troubleshooting: + +Troubleshooting +=============== + +There's also a troubleshooting section in the :ref:`faq`. + +Worker does not start: Permission Error +--------------------------------------- + +- If you're using Debian, Ubuntu or other Debian-based distributions: + + Debian recently renamed the ``/dev/shm`` special file to ``/run/shm``. + + A simple workaround is to create a symbolic link: + + .. code-block:: bash + + # ln -s /run/shm /dev/shm + +- Others: + + If you provide any of the :option:`--pidfile`, :option:`--logfile` or + ``--statedb`` arguments, then you must make sure that they + point to a file/directory that is writable and readable by the + user starting the worker. + +Result backend does not work or tasks are always in ``PENDING`` state. +---------------------------------------------------------------------- + +All tasks are ``PENDING`` by default, so the state would have been +better named "unknown". Celery does not update any state when a task +is sent, and any task with no history is assumed to be pending (you know +the task id after all). + +1) Make sure that the task does not have ``ignore_result`` enabled. + + Enabling this option will force the worker to skip updating + states. + +2) Make sure the :setting:`CELERY_IGNORE_RESULT` setting is not enabled. + +3) Make sure that you do not have any old workers still running. + + It's easy to start multiple workers by accident, so make sure + that the previous worker is properly shutdown before you start a new one. + + An old worker that is not configured with the expected result backend + may be running and is hijacking the tasks. + + The `--pidfile` argument can be set to an absolute path to make sure + this doesn't happen. + +4) Make sure the client is configured with the right backend. + + If for some reason the client is configured to use a different backend + than the worker, you will not be able to receive the result, + so make sure the backend is correct by inspecting it: + + .. code-block:: python + + >>> result = task.delay(…) + >>> print(result.backend) diff --git a/docs/getting-started/index.rst b/docs/getting-started/index.rst new file mode 100644 index 0000000..b590a18 --- /dev/null +++ b/docs/getting-started/index.rst @@ -0,0 +1,15 @@ +================= + Getting Started +================= + +:Release: |version| +:Date: |today| + +.. toctree:: + :maxdepth: 2 + + introduction + brokers/index + first-steps-with-celery + next-steps + resources diff --git a/docs/getting-started/introduction.rst b/docs/getting-started/introduction.rst new file mode 100644 index 0000000..466938f --- /dev/null +++ b/docs/getting-started/introduction.rst @@ -0,0 +1,316 @@ +.. _intro: + +======================== + Introduction to Celery +======================== + +.. contents:: + :local: + :depth: 1 + +What is a Task Queue? +===================== + +Task queues are used as a mechanism to distribute work across threads or +machines. + +A task queue's input is a unit of work called a task. Dedicated worker +processes constantly monitor task queues for new work to perform. + +Celery communicates via messages, usually using a broker +to mediate between clients and workers. To initiate a task, a client adds a +message to the queue, which the broker then delivers to a worker. + +A Celery system can consist of multiple workers and brokers, giving way +to high availability and horizontal scaling. + +Celery is written in Python, but the protocol can be implemented in any +language. So far there's RCelery_ for the Ruby programming language, +node-celery_ for Node.js and a `PHP client`_. Language interoperability can also be achieved +by :ref:`using webhooks `. + +.. _RCelery: http://leapfrogdevelopment.github.com/rcelery/ +.. _`PHP client`: https://github.com/gjedeer/celery-php +.. _node-celery: https://github.com/mher/node-celery + +What do I need? +=============== + +.. sidebar:: Version Requirements + :subtitle: Celery version 3.0 runs on + + - Python ❨2.5, 2.6, 2.7, 3.2, 3.3❩ + - PyPy ❨1.8, 1.9❩ + - Jython ❨2.5, 2.7❩. + + This is the last version to support Python 2.5, + and from the next version Python 2.6 or newer is required. + The last version to support Python 2.4 was Celery series 2.2. + +*Celery* requires a message transport to send and receive messages. +The RabbitMQ and Redis broker transports are feature complete, +but there's also support for a myriad of other experimental solutions, including +using SQLite for local development. + +*Celery* can run on a single machine, on multiple machines, or even +across data centers. + +Get Started +=========== + +If this is the first time you're trying to use Celery, or you are +new to Celery 3.0 coming from previous versions then you should read our +getting started tutorials: + +- :ref:`first-steps` +- :ref:`next-steps` + +Celery is… +========== + +.. _`mailing-list`: http://groups.google.com/group/celery-users + +.. topic:: \ + + - **Simple** + + Celery is easy to use and maintain, and it *doesn't need configuration files*. + + It has an active, friendly community you can talk to for support, + including a `mailing-list`_ and an :ref:`IRC channel `. + + Here's one of the simplest applications you can make: + + .. code-block:: python + + from celery import Celery + + app = Celery('hello', broker='amqp://guest@localhost//') + + @app.task + def hello(): + return 'hello world' + + - **Highly Available** + + Workers and clients will automatically retry in the event + of connection loss or failure, and some brokers support + HA in way of *Master/Master* or *Master/Slave* replication. + + - **Fast** + + A single Celery process can process millions of tasks a minute, + with sub-millisecond round-trip latency (using RabbitMQ, + py-librabbitmq, and optimized settings). + + - **Flexible** + + Almost every part of *Celery* can be extended or used on its own, + Custom pool implementations, serializers, compression schemes, logging, + schedulers, consumers, producers, autoscalers, broker transports and much more. + + +.. topic:: It supports + + .. hlist:: + :columns: 2 + + - **Brokers** + + - :ref:`RabbitMQ `, :ref:`Redis `, + - :ref:`MongoDB ` (exp), ZeroMQ (exp) + - :ref:`CouchDB ` (exp), :ref:`SQLAlchemy ` (exp) + - :ref:`Django ORM ` (exp), :ref:`Amazon SQS `, (exp) + - and more… + + - **Concurrency** + + - prefork (multiprocessing), + - Eventlet_, gevent_ + - threads/single threaded + + - **Result Stores** + + - AMQP, Redis + - memcached, MongoDB + - SQLAlchemy, Django ORM + - Apache Cassandra + + - **Serialization** + + - *pickle*, *json*, *yaml*, *msgpack*. + - *zlib*, *bzip2* compression. + - Cryptographic message signing. + +Features +======== + +.. topic:: \ + + .. hlist:: + :columns: 2 + + - **Monitoring** + + A stream of monitoring events is emitted by workers and + is used by built-in and external tools to tell you what + your cluster is doing -- in real-time. + + :ref:`Read more… `. + + - **Workflows** + + Simple and complex workflows can be composed using + a set of powerful primitives we call the "canvas", + including grouping, chaining, chunking and more. + + :ref:`Read more… `. + + - **Time & Rate Limits** + + You can control how many tasks can be executed per second/minute/hour, + or how long a task can be allowed to run, and this can be set as + a default, for a specific worker or individually for each task type. + + :ref:`Read more… `. + + - **Scheduling** + + You can specify the time to run a task in seconds or a + :class:`~datetime.datetime`, or or you can use + periodic tasks for recurring events based on a + simple interval, or crontab expressions + supporting minute, hour, day of week, day of month, and + month of year. + + :ref:`Read more… `. + + - **Autoreloading** + + In development workers can be configured to automatically reload source + code as it changes, including :manpage:`inotify(7)` support on Linux. + + :ref:`Read more… `. + + - **Autoscaling** + + Dynamically resizing the worker pool depending on load, + or custom metrics specified by the user, used to limit + memory usage in shared hosting/cloud environments or to + enforce a given quality of service. + + :ref:`Read more… `. + + - **Resource Leak Protection** + + The :option:`--maxtasksperchild` option is used for user tasks + leaking resources, like memory or file descriptors, that + are simply out of your control. + + :ref:`Read more… `. + + - **User Components** + + Each worker component can be customized, and additional components + can be defined by the user. The worker is built up using "bootsteps" — a + dependency graph enabling fine grained control of the worker's + internals. + +.. _`Eventlet`: http://eventlet.net/ +.. _`gevent`: http://gevent.org/ + +Framework Integration +===================== + +Celery is easy to integrate with web frameworks, some of which even have +integration packages: + + +--------------------+------------------------+ + | `Django`_ | `django-celery`_ | + +--------------------+------------------------+ + | `Pyramid`_ | `pyramid_celery`_ | + +--------------------+------------------------+ + | `Pylons`_ | `celery-pylons`_ | + +--------------------+------------------------+ + | `Flask`_ | not needed | + +--------------------+------------------------+ + | `web2py`_ | `web2py-celery`_ | + +--------------------+------------------------+ + | `Tornado`_ | `tornado-celery`_ | + +--------------------+------------------------+ + +The integration packages are not strictly necessary, but they can make +development easier, and sometimes they add important hooks like closing +database connections at :manpage:`fork(2)`. + +.. _`Django`: http://djangoproject.com/ +.. _`Pylons`: http://pylonshq.com/ +.. _`Flask`: http://flask.pocoo.org/ +.. _`web2py`: http://web2py.com/ +.. _`Bottle`: http://bottlepy.org/ +.. _`Pyramid`: http://docs.pylonsproject.org/en/latest/docs/pyramid.html +.. _`pyramid_celery`: http://pypi.python.org/pypi/pyramid_celery/ +.. _`django-celery`: http://pypi.python.org/pypi/django-celery +.. _`celery-pylons`: http://pypi.python.org/pypi/celery-pylons +.. _`web2py-celery`: http://code.google.com/p/web2py-celery/ +.. _`Tornado`: http://www.tornadoweb.org/ +.. _`tornado-celery`: http://github.com/mher/tornado-celery/ + +Quickjump +========= + +.. topic:: I want to ⟶ + + .. hlist:: + :columns: 2 + + - :ref:`get the return value of a task ` + - :ref:`use logging from my task ` + - :ref:`learn about best practices ` + - :ref:`create a custom task base class ` + - :ref:`add a callback to a group of tasks ` + - :ref:`split a task into several chunks ` + - :ref:`optimize the worker ` + - :ref:`see a list of built-in task states ` + - :ref:`create custom task states ` + - :ref:`set a custom task name ` + - :ref:`track when a task starts ` + - :ref:`retry a task when it fails ` + - :ref:`get the id of the current task ` + - :ref:`know what queue a task was delivered to ` + - :ref:`see a list of running workers ` + - :ref:`purge all messages ` + - :ref:`inspect what the workers are doing ` + - :ref:`see what tasks a worker has registerd ` + - :ref:`migrate tasks to a new broker ` + - :ref:`see a list of event message types ` + - :ref:`contribute to Celery ` + - :ref:`learn about available configuration settings ` + - :ref:`receive email when a task fails ` + - :ref:`get a list of people and companies using Celery ` + - :ref:`write my own remote control command ` + - :ref:`change worker queues at runtime ` + +.. topic:: Jump to ⟶ + + .. hlist:: + :columns: 4 + + - :ref:`Brokers ` + - :ref:`Applications ` + - :ref:`Tasks ` + - :ref:`Calling ` + - :ref:`Workers ` + - :ref:`Daemonizing ` + - :ref:`Monitoring ` + - :ref:`Optimizing ` + - :ref:`Security ` + - :ref:`Routing ` + - :ref:`Configuration ` + - :ref:`Django ` + - :ref:`Contributing ` + - :ref:`Signals ` + - :ref:`FAQ ` + - :ref:`API Reference ` + +.. include:: ../includes/installation.txt diff --git a/docs/getting-started/next-steps.rst b/docs/getting-started/next-steps.rst new file mode 100644 index 0000000..7bd7684 --- /dev/null +++ b/docs/getting-started/next-steps.rst @@ -0,0 +1,724 @@ +.. _next-steps: + +============ + Next Steps +============ + +The :ref:`first-steps` guide is intentionally minimal. In this guide +I will demonstrate what Celery offers in more detail, including +how to add Celery support for your application and library. + +This document does not document all of Celery's features and +best practices, so it's recommended that you also read the +:ref:`User Guide ` + +.. contents:: + :local: + :depth: 1 + +Using Celery in your Application +================================ + +.. _project-layout: + +Our Project +----------- + +Project layout:: + + proj/__init__.py + /celery.py + /tasks.py + +:file:`proj/celery.py` +~~~~~~~~~~~~~~~~~~~~~~ + +.. literalinclude:: ../../examples/next-steps/proj/celery.py + :language: python + +In this module you created our :class:`@Celery` instance (sometimes +referred to as the *app*). To use Celery within your project +you simply import this instance. + +- The ``broker`` argument specifies the URL of the broker to use. + + See :ref:`celerytut-broker` for more information. + +- The ``backend`` argument specifies the result backend to use, + + It's used to keep track of task state and results. + While results are disabled by default I use the amqp result backend here + because I demonstrate how retrieving results work later, you may want to use + a different backend for your application. They all have different + strengths and weaknesses. If you don't need results it's better + to disable them. Results can also be disabled for individual tasks + by setting the ``@task(ignore_result=True)`` option. + + See :ref:`celerytut-keeping-results` for more information. + +- The ``include`` argument is a list of modules to import when + the worker starts. You need to add our tasks module here so + that the worker is able to find our tasks. + +:file:`proj/tasks.py` +~~~~~~~~~~~~~~~~~~~~~ + +.. literalinclude:: ../../examples/next-steps/proj/tasks.py + :language: python + + +Starting the worker +------------------- + +The :program:`celery` program can be used to start the worker: + +.. code-block:: bash + + $ celery -A proj worker -l info + +When the worker starts you should see a banner and some messages:: + + -------------- celery@halcyon.local v3.1 (Cipater) + ---- **** ----- + --- * *** * -- [Configuration] + -- * - **** --- . broker: amqp://guest@localhost:5672// + - ** ---------- . app: __main__:0x1012d8590 + - ** ---------- . concurrency: 8 (processes) + - ** ---------- . events: OFF (enable -E to monitor this worker) + - ** ---------- + - *** --- * --- [Queues] + -- ******* ---- . celery: exchange:celery(direct) binding:celery + --- ***** ----- + + [2012-06-08 16:23:51,078: WARNING/MainProcess] celery@halcyon.local has started. + +-- The *broker* is the URL you specifed in the broker argument in our ``celery`` +module, you can also specify a different broker on the command-line by using +the :option:`-b` option. + +-- *Concurrency* is the number of prefork worker process used +to process your tasks concurrently, when all of these are busy doing work +new tasks will have to wait for one of the tasks to finish before +it can be processed. + +The default concurrency number is the number of CPU's on that machine +(including cores), you can specify a custom number using :option:`-c` option. +There is no recommended value, as the optimal number depends on a number of +factors, but if your tasks are mostly I/O-bound then you can try to increase +it, experimentation has shown that adding more than twice the number +of CPU's is rarely effective, and likely to degrade performance +instead. + +Including the default prefork pool, Celery also supports using +Eventlet, Gevent, and threads (see :ref:`concurrency`). + +-- *Events* is an option that when enabled causes Celery to send +monitoring messages (events) for actions occurring in the worker. +These can be used by monitor programs like ``celery events``, +and Flower - the real-time Celery monitor, which you can read about in +the :ref:`Monitoring and Management guide `. + +-- *Queues* is the list of queues that the worker will consume +tasks from. The worker can be told to consume from several queues +at once, and this is used to route messages to specific workers +as a means for Quality of Service, separation of concerns, +and emulating priorities, all described in the :ref:`Routing Guide +`. + +You can get a complete list of command-line arguments +by passing in the `--help` flag: + +.. code-block:: bash + + $ celery worker --help + +These options are described in more detailed in the :ref:`Workers Guide `. + +Stopping the worker +~~~~~~~~~~~~~~~~~~~ + +To stop the worker simply hit Ctrl+C. A list of signals supported +by the worker is detailed in the :ref:`Workers Guide `. + +In the background +~~~~~~~~~~~~~~~~~ + +In production you will want to run the worker in the background, this is +described in detail in the :ref:`daemonization tutorial `. + +The daemonization scripts uses the :program:`celery multi` command to +start one or more workers in the background: + +.. code-block:: bash + + $ celery multi start w1 -A proj -l info + celery multi v3.1.1 (Cipater) + > Starting nodes... + > w1.halcyon.local: OK + +You can restart it too: + +.. code-block:: bash + + $ celery multi restart w1 -A proj -l info + celery multi v3.1.1 (Cipater) + > Stopping nodes... + > w1.halcyon.local: TERM -> 64024 + > Waiting for 1 node..... + > w1.halcyon.local: OK + > Restarting node w1.halcyon.local: OK + celery multi v3.1.1 (Cipater) + > Stopping nodes... + > w1.halcyon.local: TERM -> 64052 + +or stop it: + +.. code-block:: bash + + $ celery multi stop w1 -A proj -l info + +The ``stop`` command is asynchronous so it will not wait for the +worker to shutdown. You will probably want to use the ``stopwait`` command +instead which will ensure all currently executing tasks is completed: + +.. code-block:: bash + + $ celery multi stopwait w1 -A proj -l info + +.. note:: + + :program:`celery multi` doesn't store information about workers + so you need to use the same command-line arguments when + restarting. Only the same pidfile and logfile arguments must be + used when stopping. + +By default it will create pid and log files in the current directory, +to protect against multiple workers launching on top of each other +you are encouraged to put these in a dedicated directory: + +.. code-block:: bash + + $ mkdir -p /var/run/celery + $ mkdir -p /var/log/celery + $ celery multi start w1 -A proj -l info --pidfile=/var/run/celery/%n.pid \ + --logfile=/var/log/celery/%n%I.log + +With the multi command you can start multiple workers, and there is a powerful +command-line syntax to specify arguments for different workers too, +e.g: + +.. code-block:: bash + + $ celery multi start 10 -A proj -l info -Q:1-3 images,video -Q:4,5 data \ + -Q default -L:4,5 debug + +For more examples see the :mod:`~celery.bin.multi` module in the API +reference. + +.. _app-argument: + +About the :option:`--app` argument +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The :option:`--app` argument specifies the Celery app instance to use, +it must be in the form of ``module.path:attribute`` + +But it also supports a shortcut form If only a package name is specified, +where it'll try to search for the app instance, in the following order: + +With ``--app=proj``: + +1) an attribute named ``proj.app``, or +2) an attribute named ``proj.celery``, or +3) any attribute in the module ``proj`` where the value is a Celery + application, or + +If none of these are found it'll try a submodule named ``proj.celery``: + +4) an attribute named ``proj.celery.app``, or +5) an attribute named ``proj.celery.celery``, or +6) Any atribute in the module ``proj.celery`` where the value is a Celery + application. + +This scheme mimics the practices used in the documentation, +i.e. ``proj:app`` for a single contained module, and ``proj.celery:app`` +for larger projects. + + +.. _calling-tasks: + +Calling Tasks +============= + +You can call a task using the :meth:`delay` method:: + + >>> add.delay(2, 2) + +This method is actually a star-argument shortcut to another method called +:meth:`apply_async`:: + + >>> add.apply_async((2, 2)) + +The latter enables you to specify execution options like the time to run +(countdown), the queue it should be sent to and so on:: + + >>> add.apply_async((2, 2), queue='lopri', countdown=10) + +In the above example the task will be sent to a queue named ``lopri`` and the +task will execute, at the earliest, 10 seconds after the message was sent. + +Applying the task directly will execute the task in the current process, +so that no message is sent:: + + >>> add(2, 2) + 4 + +These three methods - :meth:`delay`, :meth:`apply_async`, and applying +(``__call__``), represents the Celery calling API, which are also used for +subtasks. + +A more detailed overview of the Calling API can be found in the +:ref:`Calling User Guide `. + +Every task invocation will be given a unique identifier (an UUID), this +is the task id. + +The ``delay`` and ``apply_async`` methods return an :class:`~@AsyncResult` +instance, which can be used to keep track of the tasks execution state. +But for this you need to enable a :ref:`result backend ` so that +the state can be stored somewhere. + +Results are disabled by default because of the fact that there is no result +backend that suits every application, so to choose one you need to consider +the drawbacks of each individual backend. For many tasks +keeping the return value isn't even very useful, so it's a sensible default to +have. Also note that result backends are not used for monitoring tasks and workers, +for that Celery uses dedicated event messages (see :ref:`guide-monitoring`). + +If you have a result backend configured you can retrieve the return +value of a task:: + + >>> res = add.delay(2, 2) + >>> res.get(timeout=1) + 4 + +You can find the task's id by looking at the :attr:`id` attribute:: + + >>> res.id + d6b3aea2-fb9b-4ebc-8da4-848818db9114 + +You can also inspect the exception and traceback if the task raised an +exception, in fact ``result.get()`` will propagate any errors by default:: + + >>> res = add.delay(2) + >>> res.get(timeout=1) + Traceback (most recent call last): + File "", line 1, in + File "/opt/devel/celery/celery/result.py", line 113, in get + interval=interval) + File "/opt/devel/celery/celery/backends/amqp.py", line 138, in wait_for + raise self.exception_to_python(meta['result']) + TypeError: add() takes exactly 2 arguments (1 given) + +If you don't wish for the errors to propagate then you can disable that +by passing the ``propagate`` argument:: + + >>> res.get(propagate=False) + TypeError('add() takes exactly 2 arguments (1 given)',) + +In this case it will return the exception instance raised instead, +and so to check whether the task succeeded or failed you will have to +use the corresponding methods on the result instance:: + + >>> res.failed() + True + + >>> res.successful() + False + +So how does it know if the task has failed or not? It can find out by looking +at the tasks *state*:: + + >>> res.state + 'FAILURE' + +A task can only be in a single state, but it can progress through several +states. The stages of a typical task can be:: + + PENDING -> STARTED -> SUCCESS + +The started state is a special state that is only recorded if the +:setting:`CELERY_TRACK_STARTED` setting is enabled, or if the +``@task(track_started=True)`` option is set for the task. + +The pending state is actually not a recorded state, but rather +the default state for any task id that is unknown, which you can see +from this example:: + + >>> from proj.celery import app + + >>> res = app.AsyncResult('this-id-does-not-exist') + >>> res.state + 'PENDING' + +If the task is retried the stages can become even more complex, +e.g, for a task that is retried two times the stages would be:: + + PENDING -> STARTED -> RETRY -> STARTED -> RETRY -> STARTED -> SUCCESS + +To read more about task states you should see the :ref:`task-states` section +in the tasks user guide. + +Calling tasks is described in detail in the +:ref:`Calling Guide `. + +.. _designing-workflows: + +*Canvas*: Designing Workflows +============================= + +You just learned how to call a task using the tasks ``delay`` method, +and this is often all you need, but sometimes you may want to pass the +signature of a task invocation to another process or as an argument to another +function, for this Celery uses something called *subtasks*. + +A subtask wraps the arguments and execution options of a single task +invocation in a way such that it can be passed to functions or even serialized +and sent across the wire. + +You can create a subtask for the ``add`` task using the arguments ``(2, 2)``, +and a countdown of 10 seconds like this:: + + >>> add.subtask((2, 2), countdown=10) + tasks.add(2, 2) + +There is also a shortcut using star arguments:: + + >>> add.s(2, 2) + tasks.add(2, 2) + +And there's that calling API again… +----------------------------------- + +Subtask instances also supports the calling API, which means that they +have the ``delay`` and ``apply_async`` methods. + +But there is a difference in that the subtask may already have +an argument signature specified. The ``add`` task takes two arguments, +so a subtask specifying two arguments would make a complete signature:: + + >>> s1 = add.s(2, 2) + >>> res = s1.delay() + >>> res.get() + 4 + +But, you can also make incomplete signatures to create what we call +*partials*:: + + # incomplete partial: add(?, 2) + >>> s2 = add.s(2) + +``s2`` is now a partial subtask that needs another argument to be complete, +and this can be resolved when calling the subtask:: + + # resolves the partial: add(8, 2) + >>> res = s2.delay(8) + >>> res.get() + 10 + +Here you added the argument 8, which was prepended to the existing argument 2 +forming a complete signature of ``add(8, 2)``. + +Keyword arguments can also be added later, these are then merged with any +existing keyword arguments, but with new arguments taking precedence:: + + >>> s3 = add.s(2, 2, debug=True) + >>> s3.delay(debug=False) # debug is now False. + +As stated subtasks supports the calling API, which means that: + +- ``subtask.apply_async(args=(), kwargs={}, **options)`` + + Calls the subtask with optional partial arguments and partial + keyword arguments. Also supports partial execution options. + +- ``subtask.delay(*args, **kwargs)`` + + Star argument version of ``apply_async``. Any arguments will be prepended + to the arguments in the signature, and keyword arguments is merged with any + existing keys. + +So this all seems very useful, but what can you actually do with these? +To get to that I must introduce the canvas primitives… + +The Primitives +-------------- + +.. topic:: \ + + .. hlist:: + :columns: 2 + + - :ref:`group ` + - :ref:`chain ` + - :ref:`chord ` + - :ref:`map ` + - :ref:`starmap ` + - :ref:`chunks ` + +The primitives are subtasks themselves, so that they can be combined +in any number of ways to compose complex workflows. + +.. note:: + + These examples retrieve results, so to try them out you need + to configure a result backend. The example project + above already does that (see the backend argument to :class:`~celery.Celery`). + +Let's look at some examples: + +Groups +~~~~~~ + +A :class:`~celery.group` calls a list of tasks in parallel, +and it returns a special result instance that lets you inspect the results +as a group, and retrieve the return values in order. + +.. code-block:: python + + >>> from celery import group + >>> from proj.tasks import add + + >>> group(add.s(i, i) for i in xrange(10))().get() + [0, 2, 4, 6, 8, 10, 12, 14, 16, 18] + +- Partial group + +.. code-block:: python + + >>> g = group(add.s(i) for i in xrange(10)) + >>> g(10).get() + [10, 11, 12, 13, 14, 15, 16, 17, 18, 19] + +Chains +~~~~~~ + +Tasks can be linked together so that after one task returns the other +is called: + +.. code-block:: python + + >>> from celery import chain + >>> from proj.tasks import add, mul + + # (4 + 4) * 8 + >>> chain(add.s(4, 4) | mul.s(8))().get() + 64 + + +or a partial chain: + +.. code-block:: python + + # (? + 4) * 8 + >>> g = chain(add.s(4) | mul.s(8)) + >>> g(4).get() + 64 + + +Chains can also be written like this: + +.. code-block:: python + + >>> (add.s(4, 4) | mul.s(8))().get() + 64 + +Chords +~~~~~~ + +A chord is a group with a callback: + +.. code-block:: python + + >>> from celery import chord + >>> from proj.tasks import add, xsum + + >>> chord((add.s(i, i) for i in xrange(10)), xsum.s())().get() + 90 + + +A group chained to another task will be automatically converted +to a chord: + +.. code-block:: python + + >>> (group(add.s(i, i) for i in xrange(10)) | xsum.s())().get() + 90 + + +Since these primitives are all of the subtask type they +can be combined almost however you want, e.g:: + + >>> upload_document.s(file) | group(apply_filter.s() for filter in filters) + +Be sure to read more about workflows in the :ref:`Canvas ` user +guide. + +Routing +======= + +Celery supports all of the routing facilities provided by AMQP, +but it also supports simple routing where messages are sent to named queues. + +The :setting:`CELERY_ROUTES` setting enables you to route tasks by name +and keep everything centralized in one location:: + + app.conf.update( + CELERY_ROUTES = { + 'proj.tasks.add': {'queue': 'hipri'}, + }, + ) + +You can also specify the queue at runtime +with the ``queue`` argument to ``apply_async``:: + + >>> from proj.tasks import add + >>> add.apply_async((2, 2), queue='hipri') + +You can then make a worker consume from this queue by +specifying the :option:`-Q` option: + +.. code-block:: bash + + $ celery -A proj worker -Q hipri + +You may specify multiple queues by using a comma separated list, +for example you can make the worker consume from both the default +queue, and the ``hipri`` queue, where +the default queue is named ``celery`` for historical reasons: + +.. code-block:: bash + + $ celery -A proj worker -Q hipri,celery + +The order of the queues doesn't matter as the worker will +give equal weight to the queues. + +To learn more about routing, including taking use of the full +power of AMQP routing, see the :ref:`Routing Guide `. + +Remote Control +============== + +If you're using RabbitMQ (AMQP), Redis or MongoDB as the broker then +you can control and inspect the worker at runtime. + +For example you can see what tasks the worker is currently working on: + +.. code-block:: bash + + $ celery -A proj inspect active + +This is implemented by using broadcast messaging, so all remote +control commands are received by every worker in the cluster. + +You can also specify one or more workers to act on the request +using the :option:`--destination` option, which is a comma separated +list of worker host names: + +.. code-block:: bash + + $ celery -A proj inspect active --destination=celery@example.com + +If a destination is not provided then every worker will act and reply +to the request. + +The :program:`celery inspect` command contains commands that +does not change anything in the worker, it only replies information +and statistics about what is going on inside the worker. +For a list of inspect commands you can execute: + +.. code-block:: bash + + $ celery -A proj inspect --help + +Then there is the :program:`celery control` command, which contains +commands that actually changes things in the worker at runtime: + +.. code-block:: bash + + $ celery -A proj control --help + +For example you can force workers to enable event messages (used +for monitoring tasks and workers): + +.. code-block:: bash + + $ celery -A proj control enable_events + +When events are enabled you can then start the event dumper +to see what the workers are doing: + +.. code-block:: bash + + $ celery -A proj events --dump + +or you can start the curses interface: + +.. code-block:: bash + + $ celery -A proj events + +when you're finished monitoring you can disable events again: + +.. code-block:: bash + + $ celery -A proj control disable_events + +The :program:`celery status` command also uses remote control commands +and shows a list of online workers in the cluster: + +.. code-block:: bash + + $ celery -A proj status + +You can read more about the :program:`celery` command and monitoring +in the :ref:`Monitoring Guide `. + +Timezone +======== + +All times and dates, internally and in messages uses the UTC timezone. + +When the worker receives a message, for example with a countdown set it +converts that UTC time to local time. If you wish to use +a different timezone than the system timezone then you must +configure that using the :setting:`CELERY_TIMEZONE` setting:: + + app.conf.CELERY_TIMEZONE = 'Europe/London' + +Optimization +============ + +The default configuration is not optimized for throughput by default, +it tries to walk the middle way between many short tasks and fewer long +tasks, a compromise between throughput and fair scheduling. + +If you have strict fair scheduling requirements, or want to optimize +for throughput then you should read the :ref:`Optimizing Guide +`. + +If you're using RabbitMQ then you should install the :mod:`librabbitmq` +module, which is an AMQP client implemented in C: + +.. code-block:: bash + + $ pip install librabbitmq + +What to do now? +=============== + +Now that you have read this document you should continue +to the :ref:`User Guide `. + +There's also an :ref:`API reference ` if you are so inclined. diff --git a/docs/getting-started/resources.rst b/docs/getting-started/resources.rst new file mode 100644 index 0000000..c4a95bd --- /dev/null +++ b/docs/getting-started/resources.rst @@ -0,0 +1,11 @@ +.. _resources: + +=========== + Resources +=========== + +.. contents:: + :local: + :depth: 2 + +.. include:: ../includes/resources.txt diff --git a/docs/glossary.rst b/docs/glossary.rst new file mode 100644 index 0000000..ecc4561 --- /dev/null +++ b/docs/glossary.rst @@ -0,0 +1,83 @@ +.. _glossary: + +Glossary +======== + +.. glossary:: + :sorted: + + acknowledged + Workers acknowledge messages to signify that a message has been + handled. Failing to acknowledge a message + will cause the message to be redelivered. Exactly when a + transaction is considered a failure varies by transport. In AMQP the + transaction fails when the connection/channel is closed (or lost), + but in Redis/SQS the transaction times out after a configurable amount + of time (the ``visibility_timeout``). + + ack + Short for :term:`acknowledged`. + + request + Task messages are converted to *requests* within the worker. + The request information is also available as the task's + :term:`context` (the ``task.request`` attribute). + + calling + Sends a task message so that the task function is + :term:`executed ` by a worker. + + kombu + Python messaging library used by Celery to send and receive messages. + + billiard + Fork of the Python multiprocessing library containing improvements + required by Celery. + + executing + Workers *execute* task :term:`requests `. + + apply + Originally a synonym to :term:`call ` but used to signify + that a function is executed by the current process. + + context + The context of a task contains information like the id of the task, + it's arguments and what queue it was delivered to. + It can be accessed as the tasks ``request`` attribute. + See :ref:`task-request-info` + + idempotent + Idempotence is a mathematical property that describes a function that + can be called multiple times without changing the result. + Practically it means that a function can be repeated many times without + unintented effects, but not necessarily side-effect free in the pure + sense (compare to :term:`nullipotent`). + + nullipotent + describes a function that will have the same effect, and give the same + result, even if called zero or multiple times (side-effect free). + A stronger version of :term:`idempotent`. + + reentrant + describes a function that can be interrupted in the middle of + execution (e.g. by hardware interrupt or signal) and then safely + called again later. Reentrancy is not the same as + :term:`idempotence ` as the return value does not have to + be the same given the same inputs, and a reentrant function may have + side effects as long as it can be interrupted; An idempotent function + is always reentrant, but the reverse may not be true. + + cipater + Celery release 3.1 named after song by Autechre + (http://www.youtube.com/watch?v=OHsaqUr_33Y) + + prefetch multiplier + The :term:`prefetch count` is configured by using the + :setting:`CELERYD_PREFETCH_MULTIPLIER` setting, which is multiplied + by the number of pool slots (threads/processes/greenthreads). + + prefetch count + Maximum number of unacknowledged messages a consumer can hold and if + exceeded the transport should not deliver any more messages to that + consumer. See :ref:`optimizing-prefetch-limit`. diff --git a/docs/history/changelog-1.0.rst b/docs/history/changelog-1.0.rst new file mode 100644 index 0000000..f10ff94 --- /dev/null +++ b/docs/history/changelog-1.0.rst @@ -0,0 +1,1853 @@ +.. _changelog-1.0: + +=============================== + Change history for Celery 1.0 +=============================== + +.. contents:: + :local: + +.. _version-1.0.6: + +1.0.6 +===== +:release-date: 2010-06-30 09:57 A.M CEST +:release-by: Ask Solem + +* RabbitMQ 1.8.0 has extended their exchange equivalence tests to + include `auto_delete` and `durable`. This broke the AMQP backend. + + If you've already used the AMQP backend this means you have to + delete the previous definitions: + + .. code-block:: bash + + $ camqadm exchange.delete celeryresults + + or: + + .. code-block:: bash + + $ python manage.py camqadm exchange.delete celeryresults + +.. _version-1.0.5: + +1.0.5 +===== +:release-date: 2010-06-01 02:36 P.M CEST +:release-by: Ask Solem + +.. _v105-critical: + +Critical +-------- + +* SIGINT/Ctrl+C killed the pool, abruptly terminating the currently executing + tasks. + + Fixed by making the pool worker processes ignore :const:`SIGINT`. + +* Should not close the consumers before the pool is terminated, just cancel + the consumers. + + See issue #122. + +* Now depends on :mod:`billiard` >= 0.3.1 + +* worker: Previously exceptions raised by worker components could stall startup, + now it correctly logs the exceptions and shuts down. + +* worker: Prefetch counts was set too late. QoS is now set as early as possible, + so the worker: can't slurp in all the messages at start-up. + +.. _v105-changes: + +Changes +------- + +* :mod:`celery.contrib.abortable`: Abortable tasks. + + Tasks that defines steps of execution, the task can then + be aborted after each step has completed. + +* :class:`~celery.events.EventDispatcher`: No longer creates AMQP channel + if events are disabled + +* Added required RPM package names under `[bdist_rpm]` section, to support building RPMs + from the sources using setup.py + +* Running unit tests: :envvar:`NOSE_VERBOSE` environment var now enables verbose output from Nose. + +* :func:`celery.execute.apply`: Pass log file/log level arguments as task kwargs. + + See issue #110. + +* celery.execute.apply: Should return exception, not :class:`~celery.datastructures.ExceptionInfo` + on error. + + See issue #111. + +* Added new entries to the :ref:`FAQs `: + + * Should I use retry or acks_late? + * Can I call a task by name? + +.. _version-1.0.4: + +1.0.4 +===== +:release-date: 2010-05-31 09:54 A.M CEST +:release-by: Ask Solem + +* Changelog merged with 1.0.5 as the release was never announced. + +.. _version-1.0.3: + +1.0.3 +===== +:release-date: 2010-05-15 03:00 P.M CEST +:release-by: Ask Solem + +.. _v103-important: + +Important notes +--------------- + +* Messages are now acknowledged *just before* the task function is executed. + + This is the behavior we've wanted all along, but couldn't have because of + limitations in the multiprocessing module. + The previous behavior was not good, and the situation worsened with the + release of 1.0.1, so this change will definitely improve + reliability, performance and operations in general. + + For more information please see http://bit.ly/9hom6T + +* Database result backend: result now explicitly sets `null=True` as + `django-picklefield` version 0.1.5 changed the default behavior + right under our noses :( + + See: http://bit.ly/d5OwMr + + This means those who created their celery tables (via syncdb or + celeryinit) with picklefield versions >= 0.1.5 has to alter their tables to + allow the result field to be `NULL` manually. + + MySQL:: + + ALTER TABLE celery_taskmeta MODIFY result TEXT NULL + + PostgreSQL:: + + ALTER TABLE celery_taskmeta ALTER COLUMN result DROP NOT NULL + +* Removed `Task.rate_limit_queue_type`, as it was not really useful + and made it harder to refactor some parts. + +* Now depends on carrot >= 0.10.4 + +* Now depends on billiard >= 0.3.0 + +.. _v103-news: + +News +---- + +* AMQP backend: Added timeout support for `result.get()` / + `result.wait()`. + +* New task option: `Task.acks_late` (default: :setting:`CELERY_ACKS_LATE`) + + Late ack means the task messages will be acknowledged **after** the task + has been executed, not *just before*, which is the default behavior. + + .. note:: + + This means the tasks may be executed twice if the worker + crashes in mid-execution. Not acceptable for most + applications, but desirable for others. + +* Added crontab-like scheduling to periodic tasks. + + Like a cron job, you can specify units of time of when + you would like the task to execute. While not a full implementation + of cron's features, it should provide a fair degree of common scheduling + needs. + + You can specify a minute (0-59), an hour (0-23), and/or a day of the + week (0-6 where 0 is Sunday, or by names: sun, mon, tue, wed, thu, fri, + sat). + + Examples: + + .. code-block:: python + + from celery.schedules import crontab + from celery.decorators import periodic_task + + @periodic_task(run_every=crontab(hour=7, minute=30)) + def every_morning(): + print("Runs every morning at 7:30a.m") + + @periodic_task(run_every=crontab(hour=7, minute=30, day_of_week="mon")) + def every_monday_morning(): + print("Run every monday morning at 7:30a.m") + + @periodic_task(run_every=crontab(minutes=30)) + def every_hour(): + print("Runs every hour on the clock. e.g. 1:30, 2:30, 3:30 etc.") + + .. note:: + This a late addition. While we have unittests, due to the + nature of this feature we haven't been able to completely test this + in practice, so consider this experimental. + +* `TaskPool.apply_async`: Now supports the `accept_callback` argument. + +* `apply_async`: Now raises :exc:`ValueError` if task args is not a list, + or kwargs is not a tuple (Issue #95). + +* `Task.max_retries` can now be `None`, which means it will retry forever. + +* Celerybeat: Now reuses the same connection when publishing large + sets of tasks. + +* Modified the task locking example in the documentation to use + `cache.add` for atomic locking. + +* Added experimental support for a *started* status on tasks. + + If `Task.track_started` is enabled the task will report its status + as "started" when the task is executed by a worker. + + The default value is `False` as the normal behaviour is to not + report that level of granularity. Tasks are either pending, finished, + or waiting to be retried. Having a "started" status can be useful for + when there are long running tasks and there is a need to report which + task is currently running. + + The global default can be overridden by the :setting:`CELERY_TRACK_STARTED` + setting. + +* User Guide: New section `Tips and Best Practices`. + + Contributions welcome! + +.. _v103-remote-control: + +Remote control commands +----------------------- + +* Remote control commands can now send replies back to the caller. + + Existing commands has been improved to send replies, and the client + interface in `celery.task.control` has new keyword arguments: `reply`, + `timeout` and `limit`. Where reply means it will wait for replies, + timeout is the time in seconds to stop waiting for replies, and limit + is the maximum number of replies to get. + + By default, it will wait for as many replies as possible for one second. + + * rate_limit(task_name, destination=all, reply=False, timeout=1, limit=0) + + Worker returns `{"ok": message}` on success, + or `{"failure": message}` on failure. + + >>> from celery.task.control import rate_limit + >>> rate_limit("tasks.add", "10/s", reply=True) + [{'worker1': {'ok': 'new rate limit set successfully'}}, + {'worker2': {'ok': 'new rate limit set successfully'}}] + + * ping(destination=all, reply=False, timeout=1, limit=0) + + Worker returns the simple message `"pong"`. + + >>> from celery.task.control import ping + >>> ping(reply=True) + [{'worker1': 'pong'}, + {'worker2': 'pong'}, + + * revoke(destination=all, reply=False, timeout=1, limit=0) + + Worker simply returns `True`. + + >>> from celery.task.control import revoke + >>> revoke("419e46eb-cf6a-4271-86a8-442b7124132c", reply=True) + [{'worker1': True}, + {'worker2'; True}] + +* You can now add your own remote control commands! + + Remote control commands are functions registered in the command + registry. Registering a command is done using + :meth:`celery.worker.control.Panel.register`: + + .. code-block:: python + + from celery.task.control import Panel + + @Panel.register + def reset_broker_connection(state, **kwargs): + state.consumer.reset_connection() + return {"ok": "connection re-established"} + + With this module imported in the worker, you can launch the command + using `celery.task.control.broadcast`:: + + >>> from celery.task.control import broadcast + >>> broadcast("reset_broker_connection", reply=True) + [{'worker1': {'ok': 'connection re-established'}, + {'worker2': {'ok': 'connection re-established'}}] + + **TIP** You can choose the worker(s) to receive the command + by using the `destination` argument:: + + >>> broadcast("reset_broker_connection", destination=["worker1"]) + [{'worker1': {'ok': 'connection re-established'}] + +* New remote control command: `dump_reserved` + + Dumps tasks reserved by the worker, waiting to be executed:: + + >>> from celery.task.control import broadcast + >>> broadcast("dump_reserved", reply=True) + [{'myworker1': []}] + +* New remote control command: `dump_schedule` + + Dumps the workers currently registered ETA schedule. + These are tasks with an `eta` (or `countdown`) argument + waiting to be executed by the worker. + + >>> from celery.task.control import broadcast + >>> broadcast("dump_schedule", reply=True) + [{'w1': []}, + {'w3': []}, + {'w2': ['0. 2010-05-12 11:06:00 pri0 ,)", + kwargs:"{'page': 2}"}>']}, + {'w4': ['0. 2010-05-12 11:00:00 pri0 ,)", + kwargs:"{\'page\': 1}"}>', + '1. 2010-05-12 11:12:00 pri0 ,)", + kwargs:"{\'page\': 3}"}>']}] + +.. _v103-fixes: + +Fixes +----- + +* Mediator thread no longer blocks for more than 1 second. + + With rate limits enabled and when there was a lot of remaining time, + the mediator thread could block shutdown (and potentially block other + jobs from coming in). + +* Remote rate limits was not properly applied (Issue #98). + +* Now handles exceptions with Unicode messages correctly in + `TaskRequest.on_failure`. + +* Database backend: `TaskMeta.result`: default value should be `None` + not empty string. + +.. _version-1.0.2: + +1.0.2 +===== +:release-date: 2010-03-31 12:50 P.M CET +:release-by: Ask Solem + +* Deprecated: :setting:`CELERY_BACKEND`, please use + :setting:`CELERY_RESULT_BACKEND` instead. + +* We now use a custom logger in tasks. This logger supports task magic + keyword arguments in formats. + + The default format for tasks (:setting:`CELERYD_TASK_LOG_FORMAT`) now + includes the id and the name of tasks so the origin of task log messages + can easily be traced. + + Example output:: + [2010-03-25 13:11:20,317: INFO/PoolWorker-1] + [tasks.add(a6e1c5ad-60d9-42a0-8b24-9e39363125a4)] Hello from add + + To revert to the previous behavior you can set:: + + CELERYD_TASK_LOG_FORMAT = """ + [%(asctime)s: %(levelname)s/%(processName)s] %(message)s + """.strip() + +* Unit tests: Don't disable the django test database tear down, + instead fixed the underlying issue which was caused by modifications + to the `DATABASE_NAME` setting (Issue #82). + +* Django Loader: New config :setting:`CELERY_DB_REUSE_MAX` (max number of + tasks to reuse the same database connection) + + The default is to use a new connection for every task. + We would very much like to reuse the connection, but a safe number of + reuses is not known, and we don't have any way to handle the errors + that might happen, which may even be database dependent. + + See: http://bit.ly/94fwdd + +* worker: The worker components are now configurable: :setting:`CELERYD_POOL`, + :setting:`CELERYD_CONSUMER`, :setting:`CELERYD_MEDIATOR`, and + :setting:`CELERYD_ETA_SCHEDULER`. + + The default configuration is as follows: + + .. code-block:: python + + CELERYD_POOL = "celery.concurrency.processes.TaskPool" + CELERYD_MEDIATOR = "celery.worker.controllers.Mediator" + CELERYD_ETA_SCHEDULER = "celery.worker.controllers.ScheduleController" + CELERYD_CONSUMER = "celery.worker.consumer.Consumer" + + The :setting:`CELERYD_POOL` setting makes it easy to swap out the + multiprocessing pool with a threaded pool, or how about a + twisted/eventlet pool? + + Consider the competition for the first pool plug-in started! + + +* Debian init scripts: Use `-a` not `&&` (Issue #82). + +* Debian init scripts: Now always preserves `$CELERYD_OPTS` from the + `/etc/default/celeryd` and `/etc/default/celerybeat`. + +* celery.beat.Scheduler: Fixed a bug where the schedule was not properly + flushed to disk if the schedule had not been properly initialized. + +* celerybeat: Now syncs the schedule to disk when receiving the :sig:`SIGTERM` + and :sig:`SIGINT` signals. + +* Control commands: Make sure keywords arguments are not in Unicode. + +* ETA scheduler: Was missing a logger object, so the scheduler crashed + when trying to log that a task had been revoked. + +* management.commands.camqadm: Fixed typo `camqpadm` -> `camqadm` + (Issue #83). + +* PeriodicTask.delta_resolution: Was not working for days and hours, now fixed + by rounding to the nearest day/hour. + +* Fixed a potential infinite loop in `BaseAsyncResult.__eq__`, although + there is no evidence that it has ever been triggered. + +* worker: Now handles messages with encoding problems by acking them and + emitting an error message. + +.. _version-1.0.1: + +1.0.1 +===== +:release-date: 2010-02-24 07:05 P.M CET +:release-by: Ask Solem + +* Tasks are now acknowledged early instead of late. + + This is done because messages can only be acknowledged within the same + connection channel, so if the connection is lost we would have to refetch + the message again to acknowledge it. + + This might or might not affect you, but mostly those running tasks with a + really long execution time are affected, as all tasks that has made it + all the way into the pool needs to be executed before the worker can + safely terminate (this is at most the number of pool workers, multiplied + by the :setting:`CELERYD_PREFETCH_MULTIPLIER` setting.) + + We multiply the prefetch count by default to increase the performance at + times with bursts of tasks with a short execution time. If this doesn't + apply to your use case, you should be able to set the prefetch multiplier + to zero, without sacrificing performance. + + .. note:: + + A patch to :mod:`multiprocessing` is currently being + worked on, this patch would enable us to use a better solution, and is + scheduled for inclusion in the `2.0.0` release. + +* The worker now shutdowns cleanly when receiving the :sig:`SIGTERM` signal. + +* The worker now does a cold shutdown if the :sig:`SIGINT` signal + is received (Ctrl+C), + this means it tries to terminate as soon as possible. + +* Caching of results now moved to the base backend classes, so no need + to implement this functionality in the base classes. + +* Caches are now also limited in size, so their memory usage doesn't grow + out of control. + + You can set the maximum number of results the cache + can hold using the :setting:`CELERY_MAX_CACHED_RESULTS` setting (the + default is five thousand results). In addition, you can refetch already + retrieved results using `backend.reload_task_result` + + `backend.reload_taskset_result` (that's for those who want to send + results incrementally). + +* The worker now works on Windows again. + + .. warning:: + + If you're using Celery with Django, you can't use `project.settings` + as the settings module name, but the following should work: + + .. code-block:: bash + + $ python manage.py celeryd --settings=settings + +* Execution: `.messaging.TaskPublisher.send_task` now + incorporates all the functionality apply_async previously did. + + Like converting countdowns to eta, so :func:`celery.execute.apply_async` is + now simply a convenient front-end to + :meth:`celery.messaging.TaskPublisher.send_task`, using + the task classes default options. + + Also :func:`celery.execute.send_task` has been + introduced, which can apply tasks using just the task name (useful + if the client does not have the destination task in its task registry). + + Example: + + >>> from celery.execute import send_task + >>> result = send_task("celery.ping", args=[], kwargs={}) + >>> result.get() + 'pong' + +* `camqadm`: This is a new utility for command-line access to the AMQP API. + + Excellent for deleting queues/bindings/exchanges, experimentation and + testing: + + .. code-block:: bash + + $ camqadm + 1> help + + Gives an interactive shell, type `help` for a list of commands. + + When using Django, use the management command instead: + + .. code-block:: bash + + $ python manage.py camqadm + 1> help + +* Redis result backend: To conform to recent Redis API changes, the following + settings has been deprecated: + + * `REDIS_TIMEOUT` + * `REDIS_CONNECT_RETRY` + + These will emit a `DeprecationWarning` if used. + + A `REDIS_PASSWORD` setting has been added, so you can use the new + simple authentication mechanism in Redis. + +* The redis result backend no longer calls `SAVE` when disconnecting, + as this is apparently better handled by Redis itself. + +* If `settings.DEBUG` is on, the worker now warns about the possible + memory leak it can result in. + +* The ETA scheduler now sleeps at most two seconds between iterations. + +* The ETA scheduler now deletes any revoked tasks it might encounter. + + As revokes are not yet persistent, this is done to make sure the task + is revoked even though it's currently being hold because its eta is e.g. + a week into the future. + +* The `task_id` argument is now respected even if the task is executed + eagerly (either using apply, or :setting:`CELERY_ALWAYS_EAGER`). + +* The internal queues are now cleared if the connection is reset. + +* New magic keyword argument: `delivery_info`. + + Used by retry() to resend the task to its original destination using the same + exchange/routing_key. + +* Events: Fields was not passed by `.send()` (fixes the UUID key errors + in celerymon) + +* Added `--schedule`/`-s` option to the worker, so it is possible to + specify a custom schedule filename when using an embedded celerybeat + server (the `-B`/`--beat`) option. + +* Better Python 2.4 compatibility. The test suite now passes. + +* task decorators: Now preserve docstring as `cls.__doc__`, (was previously + copied to `cls.run.__doc__`) + +* The `testproj` directory has been renamed to `tests` and we're now using + `nose` + `django-nose` for test discovery, and `unittest2` for test + cases. + +* New pip requirements files available in :file:`requirements`. + +* TaskPublisher: Declarations are now done once (per process). + +* Added `Task.delivery_mode` and the :setting:`CELERY_DEFAULT_DELIVERY_MODE` + setting. + + These can be used to mark messages non-persistent (i.e. so they are + lost if the broker is restarted). + +* Now have our own `ImproperlyConfigured` exception, instead of using the + Django one. + +* Improvements to the Debian init scripts: Shows an error if the program is + not executable. Does not modify `CELERYD` when using django with + virtualenv. + +.. _version-1.0.0: + +1.0.0 +===== +:release-date: 2010-02-10 04:00 P.M CET +:release-by: Ask Solem + +.. _v100-incompatible: + +Backward incompatible changes +----------------------------- + +* Celery does not support detaching anymore, so you have to use the tools + available on your platform, or something like Supervisord to make + celeryd/celerybeat/celerymon into background processes. + + We've had too many problems with the worker daemonizing itself, so it was + decided it has to be removed. Example startup scripts has been added to + the `extra/` directory: + + * Debian, Ubuntu, (start-stop-daemon) + + `extra/debian/init.d/celeryd` + `extra/debian/init.d/celerybeat` + + * Mac OS X launchd + + `extra/mac/org.celeryq.celeryd.plist` + `extra/mac/org.celeryq.celerybeat.plist` + `extra/mac/org.celeryq.celerymon.plist` + + * Supervisord (http://supervisord.org) + + `extra/supervisord/supervisord.conf` + + In addition to `--detach`, the following program arguments has been + removed: `--uid`, `--gid`, `--workdir`, `--chroot`, `--pidfile`, + `--umask`. All good daemonization tools should support equivalent + functionality, so don't worry. + + Also the following configuration keys has been removed: + `CELERYD_PID_FILE`, `CELERYBEAT_PID_FILE`, `CELERYMON_PID_FILE`. + +* Default worker loglevel is now `WARN`, to enable the previous log level + start the worker with `--loglevel=INFO`. + +* Tasks are automatically registered. + + This means you no longer have to register your tasks manually. + You don't have to change your old code right away, as it doesn't matter if + a task is registered twice. + + If you don't want your task to be automatically registered you can set + the `abstract` attribute + + .. code-block:: python + + class MyTask(Task): + abstract = True + + By using `abstract` only tasks subclassing this task will be automatically + registered (this works like the Django ORM). + + If you don't want subclasses to be registered either, you can set the + `autoregister` attribute to `False`. + + Incidentally, this change also fixes the problems with automatic name + assignment and relative imports. So you also don't have to specify a task name + anymore if you use relative imports. + +* You can no longer use regular functions as tasks. + + This change was added + because it makes the internals a lot more clean and simple. However, you can + now turn functions into tasks by using the `@task` decorator: + + .. code-block:: python + + from celery.decorators import task + + @task() + def add(x, y): + return x + y + + .. seealso:: + + :ref:`guide-tasks` for more information about the task decorators. + +* The periodic task system has been rewritten to a centralized solution. + + This means the worker no longer schedules periodic tasks by default, + but a new daemon has been introduced: `celerybeat`. + + To launch the periodic task scheduler you have to run celerybeat: + + .. code-block:: bash + + $ celerybeat + + Make sure this is running on one server only, if you run it twice, all + periodic tasks will also be executed twice. + + If you only have one worker server you can embed it into the worker like this: + + .. code-block:: bash + + $ celeryd --beat # Embed celerybeat in celeryd. + +* The supervisor has been removed. + + This means the `-S` and `--supervised` options to `celeryd` is + no longer supported. Please use something like http://supervisord.org + instead. + +* `TaskSet.join` has been removed, use `TaskSetResult.join` instead. + +* The task status `"DONE"` has been renamed to `"SUCCESS"`. + +* `AsyncResult.is_done` has been removed, use `AsyncResult.successful` + instead. + +* The worker no longer stores errors if `Task.ignore_result` is set, to + revert to the previous behaviour set + :setting:`CELERY_STORE_ERRORS_EVEN_IF_IGNORED` to `True`. + +* The statistics functionality has been removed in favor of events, + so the `-S` and --statistics` switches has been removed. + +* The module `celery.task.strategy` has been removed. + +* `celery.discovery` has been removed, and it's `autodiscover` function is + now in `celery.loaders.djangoapp`. Reason: Internal API. + +* The :envvar:`CELERY_LOADER` environment variable now needs loader class name + in addition to module name, + + E.g. where you previously had: `"celery.loaders.default"`, you now need + `"celery.loaders.default.Loader"`, using the previous syntax will result + in a `DeprecationWarning`. + +* Detecting the loader is now lazy, and so is not done when importing + `celery.loaders`. + + To make this happen `celery.loaders.settings` has + been renamed to `load_settings` and is now a function returning the + settings object. `celery.loaders.current_loader` is now also + a function, returning the current loader. + + So:: + + loader = current_loader + + needs to be changed to:: + + loader = current_loader() + +.. _v100-deprecations: + +Deprecations +------------ + +* The following configuration variables has been renamed and will be + deprecated in v2.0: + + * CELERYD_DAEMON_LOG_FORMAT -> CELERYD_LOG_FORMAT + * CELERYD_DAEMON_LOG_LEVEL -> CELERYD_LOG_LEVEL + * CELERY_AMQP_CONNECTION_TIMEOUT -> CELERY_BROKER_CONNECTION_TIMEOUT + * CELERY_AMQP_CONNECTION_RETRY -> CELERY_BROKER_CONNECTION_RETRY + * CELERY_AMQP_CONNECTION_MAX_RETRIES -> CELERY_BROKER_CONNECTION_MAX_RETRIES + * SEND_CELERY_TASK_ERROR_EMAILS -> CELERY_SEND_TASK_ERROR_EMAILS + +* The public API names in celery.conf has also changed to a consistent naming + scheme. + +* We now support consuming from an arbitrary number of queues. + + To do this we had to rename the configuration syntax. If you use any of + the custom AMQP routing options (queue/exchange/routing_key, etc.), you + should read the new FAQ entry: :ref:`faq-task-routing`. + + The previous syntax is deprecated and scheduled for removal in v2.0. + +* `TaskSet.run` has been renamed to `TaskSet.apply_async`. + + `TaskSet.run` has now been deprecated, and is scheduled for + removal in v2.0. + +.. v100-news: + +News +---- + +* Rate limiting support (per task type, or globally). + +* New periodic task system. + +* Automatic registration. + +* New cool task decorator syntax. + +* worker: now sends events if enabled with the `-E` argument. + + Excellent for monitoring tools, one is already in the making + (http://github.com/celery/celerymon). + + Current events include: :event:`worker-heartbeat`, + task-[received/succeeded/failed/retried], + :event:`worker-online`, :event:`worker-offline`. + +* You can now delete (revoke) tasks that has already been applied. + +* You can now set the hostname the worker identifies as using the `--hostname` + argument. + +* Cache backend now respects the :setting:`CELERY_TASK_RESULT_EXPIRES` setting. + +* Message format has been standardized and now uses ISO-8601 format + for dates instead of datetime. + +* worker now responds to the :sig:`SIGHUP` signal by restarting itself. + +* Periodic tasks are now scheduled on the clock. + + I.e. `timedelta(hours=1)` means every hour at :00 minutes, not every + hour from the server starts. To revert to the previous behaviour you + can set `PeriodicTask.relative = True`. + +* Now supports passing execute options to a TaskSets list of args, e.g.: + + >>> ts = TaskSet(add, [([2, 2], {}, {"countdown": 1}), + ... ([4, 4], {}, {"countdown": 2}), + ... ([8, 8], {}, {"countdown": 3})]) + >>> ts.run() + +* Got a 3x performance gain by setting the prefetch count to four times the + concurrency, (from an average task round-trip of 0.1s to 0.03s!). + + A new setting has been added: :setting:`CELERYD_PREFETCH_MULTIPLIER`, which + is set to `4` by default. + +* Improved support for webhook tasks. + + `celery.task.rest` is now deprecated, replaced with the new and shiny + :mod:`celery.task.http`. With more reflective names, sensible interface, + and it's possible to override the methods used to perform HTTP requests. + +* The results of task sets are now cached by storing it in the result + backend. + +.. _v100-changes: + +Changes +------- + +* Now depends on carrot >= 0.8.1 + +* New dependencies: billiard, python-dateutil, django-picklefield + +* No longer depends on python-daemon + +* The `uuid` distribution is added as a dependency when running Python 2.4. + +* Now remembers the previously detected loader by keeping it in + the :envvar:`CELERY_LOADER` environment variable. + + This may help on windows where fork emulation is used. + +* ETA no longer sends datetime objects, but uses ISO 8601 date format in a + string for better compatibility with other platforms. + +* No longer sends error mails for retried tasks. + +* Task can now override the backend used to store results. + +* Refactored the ExecuteWrapper, `apply` and :setting:`CELERY_ALWAYS_EAGER` + now also executes the task callbacks and signals. + +* Now using a proper scheduler for the tasks with an ETA. + + This means waiting eta tasks are sorted by time, so we don't have + to poll the whole list all the time. + +* Now also imports modules listed in :setting:`CELERY_IMPORTS` when running + with django (as documented). + +* Log level for stdout/stderr changed from INFO to ERROR + +* ImportErrors are now properly propagated when autodiscovering tasks. + +* You can now use `celery.messaging.establish_connection` to establish a + connection to the broker. + +* When running as a separate service the periodic task scheduler does some + smart moves to not poll too regularly. + + If you need faster poll times you can lower the value + of :setting:`CELERYBEAT_MAX_LOOP_INTERVAL`. + +* You can now change periodic task intervals at runtime, by making + `run_every` a property, or subclassing `PeriodicTask.is_due`. + +* The worker now supports control commands enabled through the use of a + broadcast queue, you can remotely revoke tasks or set the rate limit for + a task type. See :mod:`celery.task.control`. + +* The services now sets informative process names (as shown in `ps` + listings) if the :mod:`setproctitle` module is installed. + +* :exc:`~@NotRegistered` now inherits from :exc:`KeyError`, + and `TaskRegistry.__getitem__`+`pop` raises `NotRegistered` instead + +* You can set the loader via the :envvar:`CELERY_LOADER` environment variable. + +* You can now set :setting:`CELERY_IGNORE_RESULT` to ignore task results by + default (if enabled, tasks doesn't save results or errors to the backend used). + +* The worker now correctly handles malformed messages by throwing away and + acknowledging the message, instead of crashing. + +.. _v100-bugs: + +Bugs +---- + +* Fixed a race condition that could happen while storing task results in the + database. + +.. _v100-documentation: + +Documentation +------------- + +* Reference now split into two sections; API reference and internal module + reference. + +.. _version-0.8.4: + +0.8.4 +===== +:release-date: 2010-02-05 01:52 P.M CEST +:release-by: Ask Solem + +* Now emits a warning if the --detach argument is used. + --detach should not be used anymore, as it has several not easily fixed + bugs related to it. Instead, use something like start-stop-daemon, + Supervisord or launchd (os x). + + +* Make sure logger class is process aware, even if running Python >= 2.6. + + +* Error emails are not sent anymore when the task is retried. + +.. _version-0.8.3: + +0.8.3 +===== +:release-date: 2009-12-22 09:43 A.M CEST +:release-by: Ask Solem + +* Fixed a possible race condition that could happen when storing/querying + task results using the database backend. + +* Now has console script entry points in the setup.py file, so tools like + Buildout will correctly install the programs celeryd and celeryinit. + +.. _version-0.8.2: + +0.8.2 +===== +:release-date: 2009-11-20 03:40 P.M CEST +:release-by: Ask Solem + +* QOS Prefetch count was not applied properly, as it was set for every message + received (which apparently behaves like, "receive one more"), instead of only + set when our wanted value changed. + +.. _version-0.8.1: + +0.8.1 +================================= +:release-date: 2009-11-16 05:21 P.M CEST +:release-by: Ask Solem + +.. _v081-very-important: + +Very important note +------------------- + +This release (with carrot 0.8.0) enables AMQP QoS (quality of service), which +means the workers will only receive as many messages as it can handle at a +time. As with any release, you should test this version upgrade on your +development servers before rolling it out to production! + +.. _v081-important: + +Important changes +----------------- + +* If you're using Python < 2.6 and you use the multiprocessing backport, then + multiprocessing version 2.6.2.1 is required. + +* All AMQP_* settings has been renamed to BROKER_*, and in addition + AMQP_SERVER has been renamed to BROKER_HOST, so before where you had:: + + AMQP_SERVER = "localhost" + AMQP_PORT = 5678 + AMQP_USER = "myuser" + AMQP_PASSWORD = "mypassword" + AMQP_VHOST = "celery" + + You need to change that to:: + + BROKER_HOST = "localhost" + BROKER_PORT = 5678 + BROKER_USER = "myuser" + BROKER_PASSWORD = "mypassword" + BROKER_VHOST = "celery" + +* Custom carrot backends now need to include the backend class name, so before + where you had:: + + CARROT_BACKEND = "mycustom.backend.module" + + you need to change it to:: + + CARROT_BACKEND = "mycustom.backend.module.Backend" + + where `Backend` is the class name. This is probably `"Backend"`, as + that was the previously implied name. + +* New version requirement for carrot: 0.8.0 + +.. _v081-changes: + +Changes +------- + +* Incorporated the multiprocessing backport patch that fixes the + `processName` error. + +* Ignore the result of PeriodicTask's by default. + +* Added a Redis result store backend + +* Allow /etc/default/celeryd to define additional options for the celeryd init + script. + +* MongoDB periodic tasks issue when using different time than UTC fixed. + +* Windows specific: Negate test for available os.fork (thanks miracle2k) + +* Now tried to handle broken PID files. + +* Added a Django test runner to contrib that sets + `CELERY_ALWAYS_EAGER = True` for testing with the database backend. + +* Added a :setting:`CELERY_CACHE_BACKEND` setting for using something other + than the django-global cache backend. + +* Use custom implementation of functools.partial (curry) for Python 2.4 support + (Probably still problems with running on 2.4, but it will eventually be + supported) + +* Prepare exception to pickle when saving :state:`RETRY` status for all backends. + +* SQLite no concurrency limit should only be effective if the database backend + is used. + + +.. _version-0.8.0: + +0.8.0 +===== +:release-date: 2009-09-22 03:06 P.M CEST +:release-by: Ask Solem + +.. _v080-incompatible: + +Backward incompatible changes +----------------------------- + +* Add traceback to result value on failure. + + .. note:: + + If you use the database backend you have to re-create the + database table `celery_taskmeta`. + + Contact the :ref:`mailing-list` or :ref:`irc-channel` channel + for help doing this. + +* Database tables are now only created if the database backend is used, + so if you change back to the database backend at some point, + be sure to initialize tables (django: `syncdb`, python: `celeryinit`). + + .. note:: + + This is only applies if using Django version 1.1 or higher. + +* Now depends on `carrot` version 0.6.0. + +* Now depends on python-daemon 1.4.8 + +.. _v080-important: + +Important changes +----------------- + +* Celery can now be used in pure Python (outside of a Django project). + + This means celery is no longer Django specific. + + For more information see the FAQ entry + :ref:`faq-is-celery-for-django-only`. + +* Celery now supports task retries. + + See :ref:`task-retry` for more information. + +* We now have an AMQP result store backend. + + It uses messages to publish task return value and status. And it's + incredibly fast! + + See issue #6 for more info! + +* AMQP QoS (prefetch count) implemented: + + This to not receive more messages than we can handle. + +* Now redirects stdout/stderr to the workers log file when detached + +* Now uses `inspect.getargspec` to only pass default arguments + the task supports. + +* Add Task.on_success, .on_retry, .on_failure handlers + See :meth:`celery.task.base.Task.on_success`, + :meth:`celery.task.base.Task.on_retry`, + :meth:`celery.task.base.Task.on_failure`, + +* `celery.utils.gen_unique_id`: Workaround for + http://bugs.python.org/issue4607 + +* You can now customize what happens at worker start, at process init, etc., + by creating your own loaders. (see :mod:`celery.loaders.default`, + :mod:`celery.loaders.djangoapp`, :mod:`celery.loaders`.) + +* Support for multiple AMQP exchanges and queues. + + This feature misses documentation and tests, so anyone interested + is encouraged to improve this situation. + +* The worker now survives a restart of the AMQP server! + + Automatically re-establish AMQP broker connection if it's lost. + + New settings: + + * AMQP_CONNECTION_RETRY + Set to `True` to enable connection retries. + + * AMQP_CONNECTION_MAX_RETRIES. + Maximum number of restarts before we give up. Default: `100`. + +.. _v080-news: + +News +---- + +* Fix an incompatibility between python-daemon and multiprocessing, + which resulted in the `[Errno 10] No child processes` problem when + detaching. + +* Fixed a possible DjangoUnicodeDecodeError being raised when saving pickled + data to Django`s memcached cache backend. + +* Better Windows compatibility. + +* New version of the pickled field (taken from + http://www.djangosnippets.org/snippets/513/) + +* New signals introduced: `task_sent`, `task_prerun` and + `task_postrun`, see :mod:`celery.signals` for more information. + +* `TaskSetResult.join` caused `TypeError` when `timeout=None`. + Thanks Jerzy Kozera. Closes #31 + +* `views.apply` should return `HttpResponse` instance. + Thanks to Jerzy Kozera. Closes #32 + +* `PeriodicTask`: Save conversion of `run_every` from `int` + to `timedelta` to the class attribute instead of on the instance. + +* Exceptions has been moved to `celery.exceptions`, but are still + available in the previous module. + +* Try to rollback transaction and retry saving result if an error happens + while setting task status with the database backend. + +* jail() refactored into :class:`celery.execute.ExecuteWrapper`. + +* `views.apply` now correctly sets mime-type to "application/json" + +* `views.task_status` now returns exception if state is :state:`RETRY` + +* `views.task_status` now returns traceback if state is :state:`FAILURE` + or :state:`RETRY` + +* Documented default task arguments. + +* Add a sensible __repr__ to ExceptionInfo for easier debugging + +* Fix documentation typo `.. import map` -> `.. import dmap`. + Thanks to mikedizon + +.. _version-0.6.0: + +0.6.0 +===== +:release-date: 2009-08-07 06:54 A.M CET +:release-by: Ask Solem + +.. _v060-important: + +Important changes +----------------- + +* Fixed a bug where tasks raising unpickleable exceptions crashed pool + workers. So if you've had pool workers mysteriously disappearing, or + problems with the worker stopping working, this has been fixed in this + version. + +* Fixed a race condition with periodic tasks. + +* The task pool is now supervised, so if a pool worker crashes, + goes away or stops responding, it is automatically replaced with + a new one. + +* Task.name is now automatically generated out of class module+name, e.g. + `"djangotwitter.tasks.UpdateStatusesTask"`. Very convenient. No idea why + we didn't do this before. Some documentation is updated to not manually + specify a task name. + +.. _v060-news: + +News +---- + +* Tested with Django 1.1 + +* New Tutorial: Creating a click counter using carrot and celery + +* Database entries for periodic tasks are now created at the workers + startup instead of for each check (which has been a forgotten TODO/XXX + in the code for a long time) + +* New settings variable: :setting:`CELERY_TASK_RESULT_EXPIRES` + Time (in seconds, or a `datetime.timedelta` object) for when after + stored task results are deleted. For the moment this only works for the + database backend. + +* The worker now emits a debug log message for which periodic tasks + has been launched. + +* The periodic task table is now locked for reading while getting + periodic task status. (MySQL only so far, seeking patches for other + engines) + +* A lot more debugging information is now available by turning on the + `DEBUG` log level (`--loglevel=DEBUG`). + +* Functions/methods with a timeout argument now works correctly. + +* New: `celery.strategy.even_time_distribution`: + With an iterator yielding task args, kwargs tuples, evenly distribute + the processing of its tasks throughout the time window available. + +* Log message `Unknown task ignored...` now has log level `ERROR` + +* Log message when task is received is now emitted for all tasks, even if + the task has an ETA (estimated time of arrival). Also the log message now + includes the ETA for the task (if any). + +* Acknowledgement now happens in the pool callback. Can't do ack in the job + target, as it's not pickleable (can't share AMQP connection, etc.)). + +* Added note about .delay hanging in README + +* Tests now passing in Django 1.1 + +* Fixed discovery to make sure app is in INSTALLED_APPS + +* Previously overridden pool behavior (process reap, wait until pool worker + available, etc.) is now handled by `multiprocessing.Pool` itself. + +* Convert statistics data to Unicode for use as kwargs. Thanks Lucy! + +.. _version-0.4.1: + +0.4.1 +===== +:release-date: 2009-07-02 01:42 P.M CET +:release-by: Ask Solem + +* Fixed a bug with parsing the message options (`mandatory`, + `routing_key`, `priority`, `immediate`) + +.. _version-0.4.0: + +0.4.0 +===== +:release-date: 2009-07-01 07:29 P.M CET +:release-by: Ask Solem + +* Adds eager execution. `celery.execute.apply`|`Task.apply` executes the + function blocking until the task is done, for API compatibility it + returns an `celery.result.EagerResult` instance. You can configure + celery to always run tasks locally by setting the + :setting:`CELERY_ALWAYS_EAGER` setting to `True`. + +* Now depends on `anyjson`. + +* 99% coverage using python `coverage` 3.0. + +.. _version-0.3.20: + +0.3.20 +====== +:release-date: 2009-06-25 08:42 P.M CET +:release-by: Ask Solem + +* New arguments to `apply_async` (the advanced version of + `delay_task`), `countdown` and `eta`; + + >>> # Run 10 seconds into the future. + >>> res = apply_async(MyTask, countdown=10); + + >>> # Run 1 day from now + >>> res = apply_async(MyTask, + ... eta=datetime.now() + timedelta(days=1)) + +* Now unlinks stale PID files + +* Lots of more tests. + +* Now compatible with carrot >= 0.5.0. + +* **IMPORTANT** The `subtask_ids` attribute on the `TaskSetResult` + instance has been removed. To get this information instead use: + + >>> subtask_ids = [subtask.id for subtask in ts_res.subtasks] + +* `Taskset.run()` now respects extra message options from the task class. + +* Task: Add attribute `ignore_result`: Don't store the status and + return value. This means you can't use the + `celery.result.AsyncResult` to check if the task is + done, or get its return value. Only use if you need the performance + and is able live without these features. Any exceptions raised will + store the return value/status as usual. + +* Task: Add attribute `disable_error_emails` to disable sending error + emails for that task. + +* Should now work on Windows (although running in the background won't + work, so using the `--detach` argument results in an exception + being raised.) + +* Added support for statistics for profiling and monitoring. + To start sending statistics start the worker with the + `--statistics option. Then after a while you can dump the results + by running `python manage.py celerystats`. See + `celery.monitoring` for more information. + +* The celery daemon can now be supervised (i.e. it is automatically + restarted if it crashes). To use this start the worker with the + --supervised` option (or alternatively `-S`). + +* views.apply: View calling a task. Example + + :: + + http://e.com/celery/apply/task_name/arg1/arg2//?kwarg1=a&kwarg2=b + + + .. warning:: + + Use with caution! Do not expose this URL to the public + without first ensuring that your code is safe! + +* Refactored `celery.task`. It's now split into three modules: + + * celery.task + + Contains `apply_async`, `delay_task`, `discard_all`, and task + shortcuts, plus imports objects from `celery.task.base` and + `celery.task.builtins` + + * celery.task.base + + Contains task base classes: `Task`, `PeriodicTask`, + `TaskSet`, `AsynchronousMapTask`, `ExecuteRemoteTask`. + + * celery.task.builtins + + Built-in tasks: `PingTask`, `DeleteExpiredTaskMetaTask`. + +.. _version-0.3.7: + +0.3.7 +===== +:release-date: 2008-06-16 11:41 P.M CET +:release-by: Ask Solem + +* **IMPORTANT** Now uses AMQP`s `basic.consume` instead of + `basic.get`. This means we're no longer polling the broker for + new messages. + +* **IMPORTANT** Default concurrency limit is now set to the number of CPUs + available on the system. + +* **IMPORTANT** `tasks.register`: Renamed `task_name` argument to + `name`, so + + >>> tasks.register(func, task_name="mytask") + + has to be replaced with: + + >>> tasks.register(func, name="mytask") + +* The daemon now correctly runs if the pidlock is stale. + +* Now compatible with carrot 0.4.5 + +* Default AMQP connection timeout is now 4 seconds. +* `AsyncResult.read()` was always returning `True`. + +* Only use README as long_description if the file exists so easy_install + doesn't break. + +* `celery.view`: JSON responses now properly set its mime-type. + +* `apply_async` now has a `connection` keyword argument so you + can re-use the same AMQP connection if you want to execute + more than one task. + +* Handle failures in task_status view such that it won't throw 500s. + +* Fixed typo `AMQP_SERVER` in documentation to `AMQP_HOST`. + +* Worker exception emails sent to administrators now works properly. + +* No longer depends on `django`, so installing `celery` won't affect + the preferred Django version installed. + +* Now works with PostgreSQL (psycopg2) again by registering the + `PickledObject` field. + +* Worker: Added `--detach` option as an alias to `--daemon`, and + it's the term used in the documentation from now on. + +* Make sure the pool and periodic task worker thread is terminated + properly at exit. (So `Ctrl-C` works again). + +* Now depends on `python-daemon`. + +* Removed dependency to `simplejson` + +* Cache Backend: Re-establishes connection for every task process + if the Django cache backend is memcached/libmemcached. + +* Tyrant Backend: Now re-establishes the connection for every task + executed. + +.. _version-0.3.3: + +0.3.3 +===== +:release-date: 2009-06-08 01:07 P.M CET +:release-by: Ask Solem + +* The `PeriodicWorkController` now sleeps for 1 second between checking + for periodic tasks to execute. + +.. _version-0.3.2: + +0.3.2 +===== +:release-date: 2009-06-08 01:07 P.M CET +:release-by: Ask Solem + +* worker: Added option `--discard`: Discard (delete!) all waiting + messages in the queue. + +* Worker: The `--wakeup-after` option was not handled as a float. + +.. _version-0.3.1: + +0.3.1 +===== +:release-date: 2009-06-08 01:07 P.M CET +:release-by: Ask Solem + +* The `PeriodicTask` worker is now running in its own thread instead + of blocking the `TaskController` loop. + +* Default `QUEUE_WAKEUP_AFTER` has been lowered to `0.1` (was `0.3`) + +.. _version-0.3.0: + +0.3.0 +===== +:release-date: 2009-06-08 12:41 P.M CET +:release-by: Ask Solem + +.. warning:: + + This is a development version, for the stable release, please + see versions 0.2.x. + +**VERY IMPORTANT:** Pickle is now the encoder used for serializing task +arguments, so be sure to flush your task queue before you upgrade. + +* **IMPORTANT** TaskSet.run() now returns a celery.result.TaskSetResult + instance, which lets you inspect the status and return values of a + taskset as it was a single entity. + +* **IMPORTANT** Celery now depends on carrot >= 0.4.1. + +* The celery daemon now sends task errors to the registered admin emails. + To turn off this feature, set `SEND_CELERY_TASK_ERROR_EMAILS` to + `False` in your `settings.py`. Thanks to Grégoire Cachet. + +* You can now run the celery daemon by using `manage.py`: + + .. code-block:: bash + + $ python manage.py celeryd + + Thanks to Grégoire Cachet. + +* Added support for message priorities, topic exchanges, custom routing + keys for tasks. This means we have introduced + `celery.task.apply_async`, a new way of executing tasks. + + You can use `celery.task.delay` and `celery.Task.delay` like usual, but + if you want greater control over the message sent, you want + `celery.task.apply_async` and `celery.Task.apply_async`. + + This also means the AMQP configuration has changed. Some settings has + been renamed, while others are new:: + + CELERY_AMQP_EXCHANGE + CELERY_AMQP_PUBLISHER_ROUTING_KEY + CELERY_AMQP_CONSUMER_ROUTING_KEY + CELERY_AMQP_CONSUMER_QUEUE + CELERY_AMQP_EXCHANGE_TYPE + + See the entry :ref:`faq-task-routing` in the + :ref:`FAQ ` for more information. + +* Task errors are now logged using log level `ERROR` instead of `INFO`, + and stacktraces are dumped. Thanks to Grégoire Cachet. + +* Make every new worker process re-establish it's Django DB connection, + this solving the "MySQL connection died?" exceptions. + Thanks to Vitaly Babiy and Jirka Vejrazka. + +* **IMPORTANT** Now using pickle to encode task arguments. This means you + now can pass complex python objects to tasks as arguments. + +* Removed dependency to `yadayada`. + +* Added a FAQ, see `docs/faq.rst`. + +* Now converts any Unicode keys in task `kwargs` to regular strings. + Thanks Vitaly Babiy. + +* Renamed the `TaskDaemon` to `WorkController`. + +* `celery.datastructures.TaskProcessQueue` is now renamed to + `celery.pool.TaskPool`. + +* The pool algorithm has been refactored for greater performance and + stability. + +.. _version-0.2.0: + +0.2.0 +===== +:release-date: 2009-05-20 05:14 P.M CET +:release-by: Ask Solem + +* Final release of 0.2.0 + +* Compatible with carrot version 0.4.0. + +* Fixes some syntax errors related to fetching results + from the database backend. + +.. _version-0.2.0-pre3: + +0.2.0-pre3 +========== +:release-date: 2009-05-20 05:14 P.M CET +:release-by: Ask Solem + +* *Internal release*. Improved handling of unpickleable exceptions, + `get_result` now tries to recreate something looking like the + original exception. + +.. _version-0.2.0-pre2: + +0.2.0-pre2 +========== +:release-date: 2009-05-20 01:56 P.M CET +:release-by: Ask Solem + +* Now handles unpickleable exceptions (like the dynamically generated + subclasses of `django.core.exception.MultipleObjectsReturned`). + +.. _version-0.2.0-pre1: + +0.2.0-pre1 +========== +:release-date: 2009-05-20 12:33 P.M CET +:release-by: Ask Solem + +* It's getting quite stable, with a lot of new features, so bump + version to 0.2. This is a pre-release. + +* `celery.task.mark_as_read()` and `celery.task.mark_as_failure()` has + been removed. Use `celery.backends.default_backend.mark_as_read()`, + and `celery.backends.default_backend.mark_as_failure()` instead. + +.. _version-0.1.15: + +0.1.15 +====== +:release-date: 2009-05-19 04:13 P.M CET +:release-by: Ask Solem + +* The celery daemon was leaking AMQP connections, this should be fixed, + if you have any problems with too many files open (like `emfile` + errors in `rabbit.log`, please contact us! + +.. _version-0.1.14: + +0.1.14 +====== +:release-date: 2009-05-19 01:08 P.M CET +:release-by: Ask Solem + +* Fixed a syntax error in the `TaskSet` class. (No such variable + `TimeOutError`). + +.. _version-0.1.13: + +0.1.13 +====== +:release-date: 2009-05-19 12:36 P.M CET +:release-by: Ask Solem + +* Forgot to add `yadayada` to install requirements. + +* Now deletes all expired task results, not just those marked as done. + +* Able to load the Tokyo Tyrant backend class without django + configuration, can specify tyrant settings directly in the class + constructor. + +* Improved API documentation + +* Now using the Sphinx documentation system, you can build + the html documentation by doing: + + .. code-block:: bash + + $ cd docs + $ make html + + and the result will be in `docs/.build/html`. + +.. _version-0.1.12: + +0.1.12 +====== +:release-date: 2009-05-18 04:38 P.M CET +:release-by: Ask Solem + +* `delay_task()` etc. now returns `celery.task.AsyncResult` object, + which lets you check the result and any failure that might have + happened. It kind of works like the `multiprocessing.AsyncResult` + class returned by `multiprocessing.Pool.map_async`. + +* Added dmap() and dmap_async(). This works like the + `multiprocessing.Pool` versions except they are tasks + distributed to the celery server. Example: + + >>> from celery.task import dmap + >>> import operator + >>> dmap(operator.add, [[2, 2], [4, 4], [8, 8]]) + >>> [4, 8, 16] + + >>> from celery.task import dmap_async + >>> import operator + >>> result = dmap_async(operator.add, [[2, 2], [4, 4], [8, 8]]) + >>> result.ready() + False + >>> time.sleep(1) + >>> result.ready() + True + >>> result.result + [4, 8, 16] + +* Refactored the task metadata cache and database backends, and added + a new backend for Tokyo Tyrant. You can set the backend in your django + settings file. E.g.:: + + CELERY_RESULT_BACKEND = "database"; # Uses the database + CELERY_RESULT_BACKEND = "cache"; # Uses the django cache framework + CELERY_RESULT_BACKEND = "tyrant"; # Uses Tokyo Tyrant + TT_HOST = "localhost"; # Hostname for the Tokyo Tyrant server. + TT_PORT = 6657; # Port of the Tokyo Tyrant server. + +.. _version-0.1.11: + +0.1.11 +====== +:release-date: 2009-05-12 02:08 P.M CET +:release-by: Ask Solem + +* The logging system was leaking file descriptors, resulting in + servers stopping with the EMFILES (too many open files) error. (fixed) + +.. _version-0.1.10: + +0.1.10 +====== +:release-date: 2009-05-11 12:46 P.M CET +:release-by: Ask Solem + +* Tasks now supports both positional arguments and keyword arguments. + +* Requires carrot 0.3.8. + +* The daemon now tries to reconnect if the connection is lost. + +.. _version-0.1.8: + +0.1.8 +===== +:release-date: 2009-05-07 12:27 P.M CET +:release-by: Ask Solem + +* Better test coverage +* More documentation +* The worker doesn't emit `Queue is empty` message if + `settings.CELERYD_EMPTY_MSG_EMIT_EVERY` is 0. + +.. _version-0.1.7: + +0.1.7 +===== +:release-date: 2009-04-30 01:50 P.M CET +:release-by: Ask Solem + +* Added some unit tests + +* Can now use the database for task metadata (like if the task has + been executed or not). Set `settings.CELERY_TASK_META` + +* Can now run `python setup.py test` to run the unit tests from + within the `tests` project. + +* Can set the AMQP exchange/routing key/queue using + `settings.CELERY_AMQP_EXCHANGE`, `settings.CELERY_AMQP_ROUTING_KEY`, + and `settings.CELERY_AMQP_CONSUMER_QUEUE`. + +.. _version-0.1.6: + +0.1.6 +===== +:release-date: 2009-04-28 02:13 P.M CET +:release-by: Ask Solem + +* Introducing `TaskSet`. A set of subtasks is executed and you can + find out how many, or if all them, are done (excellent for progress + bars and such) + +* Now catches all exceptions when running `Task.__call__`, so the + daemon doesn't die. This doesn't happen for pure functions yet, only + `Task` classes. + +* `autodiscover()` now works with zipped eggs. + +* Worker: Now adds current working directory to `sys.path` for + convenience. + +* The `run_every` attribute of `PeriodicTask` classes can now be a + `datetime.timedelta()` object. + +* Worker: You can now set the `DJANGO_PROJECT_DIR` variable + for the worker and it will add that to `sys.path` for easy launching. + +* Can now check if a task has been executed or not via HTTP. + +* You can do this by including the celery `urls.py` into your project, + + >>> url(r'^celery/$', include("celery.urls")) + + then visiting the following url,:: + + http://mysite/celery/$task_id/done/ + + this will return a JSON dictionary like e.g: + + >>> {"task": {"id": $task_id, "executed": true}} + +* `delay_task` now returns string id, not `uuid.UUID` instance. + +* Now has `PeriodicTasks`, to have `cron` like functionality. + +* Project changed name from `crunchy` to `celery`. The details of + the name change request is in `docs/name_change_request.txt`. + +.. _version-0.1.0: + +0.1.0 +===== +:release-date: 2009-04-24 11:28 A.M CET +:release-by: Ask Solem + +* Initial release diff --git a/docs/history/changelog-2.0.rst b/docs/history/changelog-2.0.rst new file mode 100644 index 0000000..93f7d5a --- /dev/null +++ b/docs/history/changelog-2.0.rst @@ -0,0 +1,1010 @@ +.. _changelog-2.0: + +=============================== + Change history for Celery 2.0 +=============================== + +.. contents:: + :local: + +.. _version-2.0.3: + +2.0.3 +===== +:release-date: 2010-08-27 12:00 P.M CEST +:release-by: Ask Solem + +.. _v203-fixes: + +Fixes +----- + +* Worker: Properly handle connection errors happening while + closing consumers. + +* Worker: Events are now buffered if the connection is down, + then sent when the connection is re-established. + +* No longer depends on the :mod:`mailer` package. + + This package had a name space collision with `django-mailer`, + so its functionality was replaced. + +* Redis result backend: Documentation typos: Redis doesn't have + database names, but database numbers. The default database is now 0. + +* :class:`~celery.task.control.inspect`: + `registered_tasks` was requesting an invalid command because of a typo. + + See issue #170. + +* :setting:`CELERY_ROUTES`: Values defined in the route should now have + precedence over values defined in :setting:`CELERY_QUEUES` when merging + the two. + + With the follow settings:: + + CELERY_QUEUES = {"cpubound": {"exchange": "cpubound", + "routing_key": "cpubound"}} + + CELERY_ROUTES = {"tasks.add": {"queue": "cpubound", + "routing_key": "tasks.add", + "serializer": "json"}} + + The final routing options for `tasks.add` will become:: + + {"exchange": "cpubound", + "routing_key": "tasks.add", + "serializer": "json"} + + This was not the case before: the values + in :setting:`CELERY_QUEUES` would take precedence. + +* Worker crashed if the value of :setting:`CELERY_TASK_ERROR_WHITELIST` was + not an iterable + +* :func:`~celery.execute.apply`: Make sure `kwargs["task_id"]` is + always set. + +* `AsyncResult.traceback`: Now returns :const:`None`, instead of raising + :exc:`KeyError` if traceback is missing. + +* :class:`~celery.task.control.inspect`: Replies did not work correctly + if no destination was specified. + +* Can now store result/metadata for custom states. + +* Worker: A warning is now emitted if the sending of task error + emails fails. + +* celeryev: Curses monitor no longer crashes if the terminal window + is resized. + + See issue #160. + +* Worker: On OS X it is not possible to run `os.exec*` in a process + that is threaded. + + This breaks the SIGHUP restart handler, + and is now disabled on OS X, emitting a warning instead. + + See issue #152. + +* :mod:`celery.execute.trace`: Properly handle `raise(str)`, + which is still allowed in Python 2.4. + + See issue #175. + +* Using urllib2 in a periodic task on OS X crashed because + of the proxy auto detection used in OS X. + + This is now fixed by using a workaround. + See issue #143. + +* Debian init scripts: Commands should not run in a sub shell + + See issue #163. + +* Debian init scripts: Use the absolute path of celeryd program to allow stat + + See issue #162. + +.. _v203-documentation: + +Documentation +------------- + +* getting-started/broker-installation: Fixed typo + + `set_permissions ""` -> `set_permissions ".*"`. + +* Tasks User Guide: Added section on database transactions. + + See issue #169. + +* Routing User Guide: Fixed typo `"feed": -> {"queue": "feeds"}`. + + See issue #169. + +* Documented the default values for the :setting:`CELERYD_CONCURRENCY` + and :setting:`CELERYD_PREFETCH_MULTIPLIER` settings. + +* Tasks User Guide: Fixed typos in the subtask example + +* celery.signals: Documented worker_process_init. + +* Daemonization cookbook: Need to export DJANGO_SETTINGS_MODULE in + `/etc/default/celeryd`. + +* Added some more FAQs from stack overflow + +* Daemonization cookbook: Fixed typo `CELERYD_LOGFILE/CELERYD_PIDFILE` + + to `CELERYD_LOG_FILE` / `CELERYD_PID_FILE` + + Also added troubleshooting section for the init scripts. + +.. _version-2.0.2: + +2.0.2 +===== +:release-date: 2010-07-22 11:31 A.M CEST +:release-by: Ask Solem + +* Routes: When using the dict route syntax, the exchange for a task + could disappear making the task unroutable. + + See issue #158. + +* Test suite now passing on Python 2.4 + +* No longer have to type `PYTHONPATH=.` to use celeryconfig in the current + directory. + + This is accomplished by the default loader ensuring that the current + directory is in `sys.path` when loading the config module. + `sys.path` is reset to its original state after loading. + + Adding the current working directory to `sys.path` without the user + knowing may be a security issue, as this means someone can drop a Python module in the users + directory that executes arbitrary commands. This was the original reason + not to do this, but if done *only when loading the config module*, this + means that the behavior will only apply to the modules imported in the + config module, which I think is a good compromise (certainly better than + just explicitly setting `PYTHONPATH=.` anyway) + +* Experimental Cassandra backend added. + +* Worker: SIGHUP handler accidentally propagated to worker pool processes. + + In combination with 7a7c44e39344789f11b5346e9cc8340f5fe4846c + this would make each child process start a new worker instance when + the terminal window was closed :/ + +* Worker: Do not install SIGHUP handler if running from a terminal. + + This fixes the problem where the worker is launched in the background + when closing the terminal. + +* Worker: Now joins threads at shutdown. + + See issue #152. + +* Test tear down: Don't use `atexit` but nose's `teardown()` functionality + instead. + + See issue #154. + +* Debian worker init script: Stop now works correctly. + +* Task logger: `warn` method added (synonym for `warning`) + +* Can now define a white list of errors to send error emails for. + + Example:: + + CELERY_TASK_ERROR_WHITELIST = ('myapp.MalformedInputError') + + See issue #153. + +* Worker: Now handles overflow exceptions in `time.mktime` while parsing + the ETA field. + +* LoggerWrapper: Try to detect loggers logging back to stderr/stdout making + an infinite loop. + +* Added :class:`celery.task.control.inspect`: Inspects a running worker. + + Examples:: + + # Inspect a single worker + >>> i = inspect("myworker.example.com") + + # Inspect several workers + >>> i = inspect(["myworker.example.com", "myworker2.example.com"]) + + # Inspect all workers consuming on this vhost. + >>> i = inspect() + + ### Methods + + # Get currently executing tasks + >>> i.active() + + # Get currently reserved tasks + >>> i.reserved() + + # Get the current eta schedule + >>> i.scheduled() + + # Worker statistics and info + >>> i.stats() + + # List of currently revoked tasks + >>> i.revoked() + + # List of registered tasks + >>> i.registered_tasks() + +* Remote control commands `dump_active`/`dump_reserved`/`dump_schedule` + now replies with detailed task requests. + + Containing the original arguments and fields of the task requested. + + In addition the remote control command `set_loglevel` has been added, + this only changes the log level for the main process. + +* Worker control command execution now catches errors and returns their + string representation in the reply. + +* Functional test suite added + + :mod:`celery.tests.functional.case` contains utilities to start + and stop an embedded worker process, for use in functional testing. + +.. _version-2.0.1: + +2.0.1 +===== +:release-date: 2010-07-09 03:02 P.M CEST +:release-by: Ask Solem + +* multiprocessing.pool: Now handles encoding errors, so that pickling errors + doesn't crash the worker processes. + +* The remote control command replies was not working with RabbitMQ 1.8.0's + stricter equivalence checks. + + If you've already hit this problem you may have to delete the + declaration: + + .. code-block:: bash + + $ camqadm exchange.delete celerycrq + + or: + + .. code-block:: bash + + $ python manage.py camqadm exchange.delete celerycrq + +* A bug sneaked in the ETA scheduler that made it only able to execute + one task per second(!) + + The scheduler sleeps between iterations so it doesn't consume too much CPU. + It keeps a list of the scheduled items sorted by time, at each iteration + it sleeps for the remaining time of the item with the nearest deadline. + If there are no eta tasks it will sleep for a minimum amount of time, one + second by default. + + A bug sneaked in here, making it sleep for one second for every task + that was scheduled. This has been fixed, so now it should move + tasks like hot knife through butter. + + In addition a new setting has been added to control the minimum sleep + interval; :setting:`CELERYD_ETA_SCHEDULER_PRECISION`. A good + value for this would be a float between 0 and 1, depending + on the needed precision. A value of 0.8 means that when the ETA of a task + is met, it will take at most 0.8 seconds for the task to be moved to the + ready queue. + +* Pool: Supervisor did not release the semaphore. + + This would lead to a deadlock if all workers terminated prematurely. + +* Added Python version trove classifiers: 2.4, 2.5, 2.6 and 2.7 + +* Tests now passing on Python 2.7. + +* Task.__reduce__: Tasks created using the task decorator can now be pickled. + +* setup.py: nose added to `tests_require`. + +* Pickle should now work with SQLAlchemy 0.5.x + +* New homepage design by Jan Henrik Helmers: http://celeryproject.org + +* New Sphinx theme by Armin Ronacher: http://docs.celeryproject.org/ + +* Fixed "pending_xref" errors shown in the HTML rendering of the + documentation. Apparently this was caused by new changes in Sphinx 1.0b2. + +* Router classes in :setting:`CELERY_ROUTES` are now imported lazily. + + Importing a router class in a module that also loads the Celery + environment would cause a circular dependency. This is solved + by importing it when needed after the environment is set up. + +* :setting:`CELERY_ROUTES` was broken if set to a single dict. + + This example in the docs should now work again:: + + CELERY_ROUTES = {"feed.tasks.import_feed": "feeds"} + +* `CREATE_MISSING_QUEUES` was not honored by apply_async. + +* New remote control command: `stats` + + Dumps information about the worker, like pool process ids, and + total number of tasks executed by type. + + Example reply:: + + [{'worker.local': + 'total': {'tasks.sleeptask': 6}, + 'pool': {'timeouts': [None, None], + 'processes': [60376, 60377], + 'max-concurrency': 2, + 'max-tasks-per-child': None, + 'put-guarded-by-semaphore': True}}] + +* New remote control command: `dump_active` + + Gives a list of tasks currently being executed by the worker. + By default arguments are passed through repr in case there + are arguments that is not JSON encodable. If you know + the arguments are JSON safe, you can pass the argument `safe=True`. + + Example reply:: + + >>> broadcast("dump_active", arguments={"safe": False}, reply=True) + [{'worker.local': [ + {'args': '(1,)', + 'time_start': 1278580542.6300001, + 'name': 'tasks.sleeptask', + 'delivery_info': { + 'consumer_tag': '30', + 'routing_key': 'celery', + 'exchange': 'celery'}, + 'hostname': 'casper.local', + 'acknowledged': True, + 'kwargs': '{}', + 'id': '802e93e9-e470-47ed-b913-06de8510aca2', + } + ]}] + +* Added experimental support for persistent revokes. + + Use the `-S|--statedb` argument to the worker to enable it: + + .. code-block:: bash + + $ celeryd --statedb=/var/run/celeryd + + This will use the file: `/var/run/celeryd.db`, + as the `shelve` module automatically adds the `.db` suffix. + +.. _version-2.0.0: + +2.0.0 +===== +:release-date: 2010-07-02 02:30 P.M CEST +:release-by: Ask Solem + +Foreword +-------- + +Celery 2.0 contains backward incompatible changes, the most important +being that the Django dependency has been removed so Celery no longer +supports Django out of the box, but instead as an add-on package +called `django-celery`_. + +We're very sorry for breaking backwards compatibility, but there's +also many new and exciting features to make up for the time you lose +upgrading, so be sure to read the :ref:`News ` section. + +Quite a lot of potential users have been upset about the Django dependency, +so maybe this is a chance to get wider adoption by the Python community as +well. + +Big thanks to all contributors, testers and users! + +.. _v200-django-upgrade: + +Upgrading for Django-users +-------------------------- + +Django integration has been moved to a separate package: `django-celery`_. + +* To upgrade you need to install the `django-celery`_ module and change:: + + INSTALLED_APPS = "celery" + + to:: + + INSTALLED_APPS = "djcelery" + +* If you use `mod_wsgi` you need to add the following line to your `.wsgi` + file:: + + import os + os.environ["CELERY_LOADER"] = "django" + +* The following modules has been moved to `django-celery`_: + + ===================================== ===================================== + **Module name** **Replace with** + ===================================== ===================================== + `celery.models` `djcelery.models` + `celery.managers` `djcelery.managers` + `celery.views` `djcelery.views` + `celery.urls` `djcelery.urls` + `celery.management` `djcelery.management` + `celery.loaders.djangoapp` `djcelery.loaders` + `celery.backends.database` `djcelery.backends.database` + `celery.backends.cache` `djcelery.backends.cache` + ===================================== ===================================== + +Importing :mod:`djcelery` will automatically setup Celery to use Django loader. +loader. It does this by setting the :envvar:`CELERY_LOADER` environment variable to +`"django"` (it won't change it if a loader is already set.) + +When the Django loader is used, the "database" and "cache" result backend +aliases will point to the :mod:`djcelery` backends instead of the built-in backends, +and configuration will be read from the Django settings. + +.. _`django-celery`: http://pypi.python.org/pypi/django-celery + +.. _v200-upgrade: + +Upgrading for others +-------------------- + +.. _v200-upgrade-database: + +Database result backend +~~~~~~~~~~~~~~~~~~~~~~~ + +The database result backend is now using `SQLAlchemy`_ instead of the +Django ORM, see `Supported Databases`_ for a table of supported databases. + +The `DATABASE_*` settings has been replaced by a single setting: +:setting:`CELERY_RESULT_DBURI`. The value here should be an +`SQLAlchemy Connection String`_, some examples include: + +.. code-block:: python + + # sqlite (filename) + CELERY_RESULT_DBURI = "sqlite:///celerydb.sqlite" + + # mysql + CELERY_RESULT_DBURI = "mysql://scott:tiger@localhost/foo" + + # postgresql + CELERY_RESULT_DBURI = "postgresql://scott:tiger@localhost/mydatabase" + + # oracle + CELERY_RESULT_DBURI = "oracle://scott:tiger@127.0.0.1:1521/sidname" + +See `SQLAlchemy Connection Strings`_ for more information about connection +strings. + +To specify additional SQLAlchemy database engine options you can use +the :setting:`CELERY_RESULT_ENGINE_OPTIONS` setting:: + + # echo enables verbose logging from SQLAlchemy. + CELERY_RESULT_ENGINE_OPTIONS = {"echo": True} + +.. _`SQLAlchemy`: + http://www.sqlalchemy.org +.. _`Supported Databases`: + http://www.sqlalchemy.org/docs/core/engines.html#supported-databases +.. _`SQLAlchemy Connection String`: + http://www.sqlalchemy.org/docs/core/engines.html#database-urls +.. _`SQLAlchemy Connection Strings`: + http://www.sqlalchemy.org/docs/core/engines.html#database-urls + +.. _v200-upgrade-cache: + +Cache result backend +~~~~~~~~~~~~~~~~~~~~ + +The cache result backend is no longer using the Django cache framework, +but it supports mostly the same configuration syntax:: + + CELERY_CACHE_BACKEND = "memcached://A.example.com:11211;B.example.com" + +To use the cache backend you must either have the `pylibmc`_ or +`python-memcached`_ library installed, of which the former is regarded +as the best choice. + +.. _`pylibmc`: http://pypi.python.org/pypi/pylibmc +.. _`python-memcached`: http://pypi.python.org/pypi/python-memcached + +The support backend types are `memcached://` and `memory://`, +we haven't felt the need to support any of the other backends +provided by Django. + +.. _v200-incompatible: + +Backward incompatible changes +----------------------------- + +* Default (python) loader now prints warning on missing `celeryconfig.py` + instead of raising :exc:`ImportError`. + + The worker raises :exc:`~@ImproperlyConfigured` if the configuration + is not set up. This makes it possible to use `--help` etc., without having a + working configuration. + + Also this makes it possible to use the client side of celery without being + configured:: + + >>> from carrot.connection import BrokerConnection + >>> conn = BrokerConnection("localhost", "guest", "guest", "/") + >>> from celery.execute import send_task + >>> r = send_task("celery.ping", args=(), kwargs={}, connection=conn) + >>> from celery.backends.amqp import AMQPBackend + >>> r.backend = AMQPBackend(connection=conn) + >>> r.get() + 'pong' + +* The following deprecated settings has been removed (as scheduled by + the :ref:`deprecation-timeline`): + + ===================================== ===================================== + **Setting name** **Replace with** + ===================================== ===================================== + `CELERY_AMQP_CONSUMER_QUEUES` `CELERY_QUEUES` + `CELERY_AMQP_EXCHANGE` `CELERY_DEFAULT_EXCHANGE` + `CELERY_AMQP_EXCHANGE_TYPE` `CELERY_DEFAULT_EXCHANGE_TYPE` + `CELERY_AMQP_CONSUMER_ROUTING_KEY` `CELERY_QUEUES` + `CELERY_AMQP_PUBLISHER_ROUTING_KEY` `CELERY_DEFAULT_ROUTING_KEY` + ===================================== ===================================== + +* The `celery.task.rest` module has been removed, use :mod:`celery.task.http` + instead (as scheduled by the :ref:`deprecation-timeline`). + +* It's no longer allowed to skip the class name in loader names. + (as scheduled by the :ref:`deprecation-timeline`): + + Assuming the implicit `Loader` class name is no longer supported, + if you use e.g.:: + + CELERY_LOADER = "myapp.loaders" + + You need to include the loader class name, like this:: + + CELERY_LOADER = "myapp.loaders.Loader" + +* :setting:`CELERY_TASK_RESULT_EXPIRES` now defaults to 1 day. + + Previous default setting was to expire in 5 days. + +* AMQP backend: Don't use different values for `auto_delete`. + + This bug became visible with RabbitMQ 1.8.0, which no longer + allows conflicting declarations for the auto_delete and durable settings. + + If you've already used celery with this backend chances are you + have to delete the previous declaration: + + .. code-block:: bash + + $ camqadm exchange.delete celeryresults + +* Now uses pickle instead of cPickle on Python versions <= 2.5 + + cPickle is broken in Python <= 2.5. + + It unsafely and incorrectly uses relative instead of absolute imports, + so e.g.:: + + exceptions.KeyError + + becomes:: + + celery.exceptions.KeyError + + Your best choice is to upgrade to Python 2.6, + as while the pure pickle version has worse performance, + it is the only safe option for older Python versions. + +.. _v200-news: + +News +---- + +* **celeryev**: Curses Celery Monitor and Event Viewer. + + This is a simple monitor allowing you to see what tasks are + executing in real-time and investigate tracebacks and results of ready + tasks. It also enables you to set new rate limits and revoke tasks. + + Screenshot: + + .. figure:: ../images/celeryevshotsm.jpg + + If you run `celeryev` with the `-d` switch it will act as an event + dumper, simply dumping the events it receives to standard out: + + .. code-block:: bash + + $ celeryev -d + -> celeryev: starting capture... + casper.local [2010-06-04 10:42:07.020000] heartbeat + casper.local [2010-06-04 10:42:14.750000] task received: + tasks.add(61a68756-27f4-4879-b816-3cf815672b0e) args=[2, 2] kwargs={} + eta=2010-06-04T10:42:16.669290, retries=0 + casper.local [2010-06-04 10:42:17.230000] task started + tasks.add(61a68756-27f4-4879-b816-3cf815672b0e) args=[2, 2] kwargs={} + casper.local [2010-06-04 10:42:17.960000] task succeeded: + tasks.add(61a68756-27f4-4879-b816-3cf815672b0e) + args=[2, 2] kwargs={} result=4, runtime=0.782663106918 + + The fields here are, in order: *sender hostname*, *timestamp*, *event type* and + *additional event fields*. + +* AMQP result backend: Now supports `.ready()`, `.successful()`, + `.result`, `.status`, and even responds to changes in task state + +* New user guides: + + * :ref:`guide-workers` + * :ref:`guide-canvas` + * :ref:`guide-routing` + +* Worker: Standard out/error is now being redirected to the log file. + +* :mod:`billiard` has been moved back to the celery repository. + + ===================================== ===================================== + **Module name** **celery equivalent** + ===================================== ===================================== + `billiard.pool` `celery.concurrency.processes.pool` + `billiard.serialization` `celery.serialization` + `billiard.utils.functional` `celery.utils.functional` + ===================================== ===================================== + + The :mod:`billiard` distribution may be maintained, depending on interest. + +* now depends on :mod:`carrot` >= 0.10.5 + +* now depends on :mod:`pyparsing` + +* Worker: Added `--purge` as an alias to `--discard`. + +* Worker: Ctrl+C (SIGINT) once does warm shutdown, hitting Ctrl+C twice + forces termination. + +* Added support for using complex crontab-expressions in periodic tasks. For + example, you can now use:: + + >>> crontab(minute="*/15") + + or even:: + + >>> crontab(minute="*/30", hour="8-17,1-2", day_of_week="thu-fri") + + See :ref:`guide-beat`. + +* Worker: Now waits for available pool processes before applying new + tasks to the pool. + + This means it doesn't have to wait for dozens of tasks to finish at shutdown + because it has applied prefetched tasks without having any pool + processes available to immediately accept them. + + See issue #122. + +* New built-in way to do task callbacks using + :class:`~celery.subtask`. + + See :ref:`guide-canvas` for more information. + +* TaskSets can now contain several types of tasks. + + :class:`~celery.task.sets.TaskSet` has been refactored to use + a new syntax, please see :ref:`guide-canvas` for more information. + + The previous syntax is still supported, but will be deprecated in + version 1.4. + +* TaskSet failed() result was incorrect. + + See issue #132. + +* Now creates different loggers per task class. + + See issue #129. + +* Missing queue definitions are now created automatically. + + You can disable this using the :setting:`CELERY_CREATE_MISSING_QUEUES` + setting. + + The missing queues are created with the following options:: + + CELERY_QUEUES[name] = {"exchange": name, + "exchange_type": "direct", + "routing_key": "name} + + This feature is added for easily setting up routing using the `-Q` + option to the worker: + + .. code-block:: bash + + $ celeryd -Q video, image + + See the new routing section of the User Guide for more information: + :ref:`guide-routing`. + +* New Task option: `Task.queue` + + If set, message options will be taken from the corresponding entry + in :setting:`CELERY_QUEUES`. `exchange`, `exchange_type` and `routing_key` + will be ignored + +* Added support for task soft and hard time limits. + + New settings added: + + * :setting:`CELERYD_TASK_TIME_LIMIT` + + Hard time limit. The worker processing the task will be killed and + replaced with a new one when this is exceeded. + + * :setting:`CELERYD_TASK_SOFT_TIME_LIMIT` + + Soft time limit. The :exc:`~@SoftTimeLimitExceeded` + exception will be raised when this is exceeded. The task can catch + this to e.g. clean up before the hard time limit comes. + + New command-line arguments to celeryd added: + `--time-limit` and `--soft-time-limit`. + + What's left? + + This won't work on platforms not supporting signals (and specifically + the `SIGUSR1` signal) yet. So an alternative the ability to disable + the feature all together on nonconforming platforms must be implemented. + + Also when the hard time limit is exceeded, the task result should + be a `TimeLimitExceeded` exception. + +* Test suite is now passing without a running broker, using the carrot + in-memory backend. + +* Log output is now available in colors. + + ===================================== ===================================== + **Log level** **Color** + ===================================== ===================================== + `DEBUG` Blue + `WARNING` Yellow + `CRITICAL` Magenta + `ERROR` Red + ===================================== ===================================== + + This is only enabled when the log output is a tty. + You can explicitly enable/disable this feature using the + :setting:`CELERYD_LOG_COLOR` setting. + +* Added support for task router classes (like the django multi-db routers) + + * New setting: :setting:`CELERY_ROUTES` + + This is a single, or a list of routers to traverse when + sending tasks. Dictionaries in this list converts to a + :class:`celery.routes.MapRoute` instance. + + Examples: + + >>> CELERY_ROUTES = {"celery.ping": "default", + "mytasks.add": "cpu-bound", + "video.encode": { + "queue": "video", + "exchange": "media" + "routing_key": "media.video.encode"}} + + >>> CELERY_ROUTES = ("myapp.tasks.Router", + {"celery.ping": "default}) + + Where `myapp.tasks.Router` could be: + + .. code-block:: python + + class Router(object): + + def route_for_task(self, task, args=None, kwargs=None): + if task == "celery.ping": + return "default" + + route_for_task may return a string or a dict. A string then means + it's a queue name in :setting:`CELERY_QUEUES`, a dict means it's a custom route. + + When sending tasks, the routers are consulted in order. The first + router that doesn't return `None` is the route to use. The message options + is then merged with the found route settings, where the routers settings + have priority. + + Example if :func:`~celery.execute.apply_async` has these arguments:: + + >>> Task.apply_async(immediate=False, exchange="video", + ... routing_key="video.compress") + + and a router returns:: + + {"immediate": True, + "exchange": "urgent"} + + the final message options will be:: + + immediate=True, exchange="urgent", routing_key="video.compress" + + (and any default message options defined in the + :class:`~celery.task.base.Task` class) + +* New Task handler called after the task returns: + :meth:`~celery.task.base.Task.after_return`. + +* :class:`~celery.datastructures.ExceptionInfo` now passed to + :meth:`~celery.task.base.Task.on_retry`/ + :meth:`~celery.task.base.Task.on_failure` as einfo keyword argument. + +* Worker: Added :setting:`CELERYD_MAX_TASKS_PER_CHILD` / + :option:`--maxtasksperchild` + + Defines the maximum number of tasks a pool worker can process before + the process is terminated and replaced by a new one. + +* Revoked tasks now marked with state :state:`REVOKED`, and `result.get()` + will now raise :exc:`~@TaskRevokedError`. + +* :func:`celery.task.control.ping` now works as expected. + +* `apply(throw=True)` / :setting:`CELERY_EAGER_PROPAGATES_EXCEPTIONS`: + Makes eager execution re-raise task errors. + +* New signal: :signal:`~celery.signals.worker_process_init`: Sent inside the + pool worker process at init. + +* Worker: :option:`-Q` option: Ability to specify list of queues to use, + disabling other configured queues. + + For example, if :setting:`CELERY_QUEUES` defines four + queues: `image`, `video`, `data` and `default`, the following + command would make the worker only consume from the `image` and `video` + queues: + + .. code-block:: bash + + $ celeryd -Q image,video + +* Worker: New return value for the `revoke` control command: + + Now returns:: + + {"ok": "task $id revoked"} + + instead of `True`. + +* Worker: Can now enable/disable events using remote control + + Example usage: + + >>> from celery.task.control import broadcast + >>> broadcast("enable_events") + >>> broadcast("disable_events") + +* Removed top-level tests directory. Test config now in celery.tests.config + + This means running the unit tests doesn't require any special setup. + `celery/tests/__init__` now configures the :envvar:`CELERY_CONFIG_MODULE` + and :envvar:`CELERY_LOADER` environment variables, so when `nosetests` + imports that, the unit test environment is all set up. + + Before you run the tests you need to install the test requirements: + + .. code-block:: bash + + $ pip install -r requirements/test.txt + + Running all tests: + + .. code-block:: bash + + $ nosetests + + Specifying the tests to run: + + .. code-block:: bash + + $ nosetests celery.tests.test_task + + Producing HTML coverage: + + .. code-block:: bash + + $ nosetests --with-coverage3 + + The coverage output is then located in `celery/tests/cover/index.html`. + +* Worker: New option `--version`: Dump version info and exit. + +* :mod:`celeryd-multi `: Tool for shell scripts + to start multiple workers. + + Some examples: + + .. code-block:: bash + + # Advanced example with 10 workers: + # * Three of the workers processes the images and video queue + # * Two of the workers processes the data queue with loglevel DEBUG + # * the rest processes the default' queue. + $ celeryd-multi start 10 -l INFO -Q:1-3 images,video -Q:4,5:data + -Q default -L:4,5 DEBUG + + # get commands to start 10 workers, with 3 processes each + $ celeryd-multi start 3 -c 3 + celeryd -n celeryd1.myhost -c 3 + celeryd -n celeryd2.myhost -c 3 + celeryd -n celeryd3.myhost -c 3 + + # start 3 named workers + $ celeryd-multi start image video data -c 3 + celeryd -n image.myhost -c 3 + celeryd -n video.myhost -c 3 + celeryd -n data.myhost -c 3 + + # specify custom hostname + $ celeryd-multi start 2 -n worker.example.com -c 3 + celeryd -n celeryd1.worker.example.com -c 3 + celeryd -n celeryd2.worker.example.com -c 3 + + # Additionl options are added to each celeryd', + # but you can also modify the options for ranges of or single workers + + # 3 workers: Two with 3 processes, and one with 10 processes. + $ celeryd-multi start 3 -c 3 -c:1 10 + celeryd -n celeryd1.myhost -c 10 + celeryd -n celeryd2.myhost -c 3 + celeryd -n celeryd3.myhost -c 3 + + # can also specify options for named workers + $ celeryd-multi start image video data -c 3 -c:image 10 + celeryd -n image.myhost -c 10 + celeryd -n video.myhost -c 3 + celeryd -n data.myhost -c 3 + + # ranges and lists of workers in options is also allowed: + # (-c:1-3 can also be written as -c:1,2,3) + $ celeryd-multi start 5 -c 3 -c:1-3 10 + celeryd-multi -n celeryd1.myhost -c 10 + celeryd-multi -n celeryd2.myhost -c 10 + celeryd-multi -n celeryd3.myhost -c 10 + celeryd-multi -n celeryd4.myhost -c 3 + celeryd-multi -n celeryd5.myhost -c 3 + + # lists also works with named workers + $ celeryd-multi start foo bar baz xuzzy -c 3 -c:foo,bar,baz 10 + celeryd-multi -n foo.myhost -c 10 + celeryd-multi -n bar.myhost -c 10 + celeryd-multi -n baz.myhost -c 10 + celeryd-multi -n xuzzy.myhost -c 3 + +* The worker now calls the result backends `process_cleanup` method + *after* task execution instead of before. + +* AMQP result backend now supports Pika. diff --git a/docs/history/changelog-2.1.rst b/docs/history/changelog-2.1.rst new file mode 100644 index 0000000..57b898f --- /dev/null +++ b/docs/history/changelog-2.1.rst @@ -0,0 +1,762 @@ +.. _changelog-2.1: + +=============================== + Change history for Celery 2.1 +=============================== + +.. contents:: + :local: + +.. _version-2.1.4: + +2.1.4 +===== +:release-date: 2010-12-03 12:00 P.M CEST +:release-by: Ask Solem + +.. _v214-fixes: + +Fixes +----- + +* Execution options to `apply_async` now takes precedence over options + returned by active routers. This was a regression introduced recently + (Issue #244). + +* curses monitor: Long arguments are now truncated so curses + doesn't crash with out of bounds errors. (Issue #235). + +* multi: Channel errors occurring while handling control commands no + longer crash the worker but are instead logged with severity error. + +* SQLAlchemy database backend: Fixed a race condition occurring when + the client wrote the pending state. Just like the Django database backend, + it does no longer save the pending state (Issue #261 + Issue #262). + +* Error email body now uses `repr(exception)` instead of `str(exception)`, + as the latter could result in Unicode decode errors (Issue #245). + +* Error email timeout value is now configurable by using the + :setting:`EMAIL_TIMEOUT` setting. + +* `celeryev`: Now works on Windows (but the curses monitor won't work without + having curses). + +* Unit test output no longer emits non-standard characters. + +* worker: The broadcast consumer is now closed if the connection is reset. + +* worker: Now properly handles errors occurring while trying to acknowledge + the message. + +* `TaskRequest.on_failure` now encodes traceback using the current filesystem + encoding. (Issue #286). + +* `EagerResult` can now be pickled (Issue #288). + +.. _v214-documentation: + +Documentation +------------- + +* Adding :ref:`contributing`. + +* Added :ref:`guide-optimizing`. + +* Added :ref:`faq-security` section to the FAQ. + +.. _version-2.1.3: + +2.1.3 +===== +:release-date: 2010-11-09 05:00 P.M CEST +:release-by: Ask Solem + +.. _v213-fixes: + +* Fixed deadlocks in `timer2` which could lead to `djcelerymon`/`celeryev -c` + hanging. + +* `EventReceiver`: now sends heartbeat request to find workers. + + This means :program:`celeryev` and friends finds workers immediately + at startup. + +* celeryev cursesmon: Set screen_delay to 10ms, so the screen refreshes more + often. + +* Fixed pickling errors when pickling :class:`AsyncResult` on older Python + versions. + +* worker: prefetch count was decremented by eta tasks even if there + were no active prefetch limits. + + +.. _version-2.1.2: + +2.1.2 +===== +:release-data: TBA + +.. _v212-fixes: + +Fixes +----- + +* worker: Now sends the :event:`task-retried` event for retried tasks. + +* worker: Now honors ignore result for + :exc:`~@WorkerLostError` and timeout errors. + +* celerybeat: Fixed :exc:`UnboundLocalError` in celerybeat logging + when using logging setup signals. + +* worker: All log messages now includes `exc_info`. + +.. _version-2.1.1: + +2.1.1 +===== +:release-date: 2010-10-14 02:00 P.M CEST +:release-by: Ask Solem + +.. _v211-fixes: + +Fixes +----- + +* Now working on Windows again. + + Removed dependency on the pwd/grp modules. + +* snapshots: Fixed race condition leading to loss of events. + +* worker: Reject tasks with an eta that cannot be converted to a time stamp. + + See issue #209 + +* concurrency.processes.pool: The semaphore was released twice for each task + (both at ACK and result ready). + + This has been fixed, and it is now released only once per task. + +* docs/configuration: Fixed typo `CELERYD_TASK_SOFT_TIME_LIMIT` -> + :setting:`CELERYD_TASK_SOFT_TIME_LIMIT`. + + See issue #214 + +* control command `dump_scheduled`: was using old .info attribute + +* multi: Fixed `set changed size during iteration` bug + occurring in the restart command. + +* worker: Accidentally tried to use additional command-line arguments. + + This would lead to an error like: + + `got multiple values for keyword argument 'concurrency'`. + + Additional command-line arguments are now ignored, and does not + produce this error. However -- we do reserve the right to use + positional arguments in the future, so please do not depend on this + behavior. + +* celerybeat: Now respects routers and task execution options again. + +* celerybeat: Now reuses the publisher instead of the connection. + +* Cache result backend: Using :class:`float` as the expires argument + to `cache.set` is deprecated by the memcached libraries, + so we now automatically cast to :class:`int`. + +* unit tests: No longer emits logging and warnings in test output. + +.. _v211-news: + +News +---- + +* Now depends on carrot version 0.10.7. + +* Added :setting:`CELERY_REDIRECT_STDOUTS`, and + :setting:`CELERYD_REDIRECT_STDOUTS_LEVEL` settings. + + :setting:`CELERY_REDIRECT_STDOUTS` is used by the worker and + beat. All output to `stdout` and `stderr` will be + redirected to the current logger if enabled. + + :setting:`CELERY_REDIRECT_STDOUTS_LEVEL` decides the log level used and is + :const:`WARNING` by default. + +* Added :setting:`CELERYBEAT_SCHEDULER` setting. + + This setting is used to define the default for the -S option to + :program:`celerybeat`. + + Example: + + .. code-block:: python + + CELERYBEAT_SCHEDULER = "djcelery.schedulers.DatabaseScheduler" + +* Added Task.expires: Used to set default expiry time for tasks. + +* New remote control commands: `add_consumer` and `cancel_consumer`. + + .. method:: add_consumer(queue, exchange, exchange_type, routing_key, + **options) + :module: + + Tells the worker to declare and consume from the specified + declaration. + + .. method:: cancel_consumer(queue_name) + :module: + + Tells the worker to stop consuming from queue (by queue name). + + + Commands also added to :program:`celeryctl` and + :class:`~celery.task.control.inspect`. + + + Example using celeryctl to start consuming from queue "queue", in + exchange "exchange", of type "direct" using binding key "key": + + .. code-block:: bash + + $ celeryctl inspect add_consumer queue exchange direct key + $ celeryctl inspect cancel_consumer queue + + See :ref:`monitoring-control` for more information about the + :program:`celeryctl` program. + + + Another example using :class:`~celery.task.control.inspect`: + + .. code-block:: python + + >>> from celery.task.control import inspect + >>> inspect.add_consumer(queue="queue", exchange="exchange", + ... exchange_type="direct", + ... routing_key="key", + ... durable=False, + ... auto_delete=True) + + >>> inspect.cancel_consumer("queue") + +* celerybeat: Now logs the traceback if a message can't be sent. + +* celerybeat: Now enables a default socket timeout of 30 seconds. + +* README/introduction/homepage: Added link to `Flask-Celery`_. + +.. _`Flask-Celery`: http://github.com/ask/flask-celery + +.. _version-2.1.0: + +2.1.0 +===== +:release-date: 2010-10-08 12:00 P.M CEST +:release-by: Ask Solem + +.. _v210-important: + +Important Notes +--------------- + +* Celery is now following the versioning semantics defined by `semver`_. + + This means we are no longer allowed to use odd/even versioning semantics + By our previous versioning scheme this stable release should have + been version 2.2. + +.. _`semver`: http://semver.org + +* Now depends on Carrot 0.10.7. + +* No longer depends on SQLAlchemy, this needs to be installed separately + if the database result backend is used. + +* django-celery now comes with a monitor for the Django Admin interface. + This can also be used if you're not a Django user. + (Update: Django-Admin monitor has been replaced with Flower, see the + Monitoring guide). + +* If you get an error after upgrading saying: + `AttributeError: 'module' object has no attribute 'system'`, + + Then this is because the `celery.platform` module has been + renamed to `celery.platforms` to not collide with the built-in + :mod:`platform` module. + + You have to remove the old :file:`platform.py` (and maybe + :file:`platform.pyc`) file from your previous Celery installation. + + To do this use :program:`python` to find the location + of this module: + + .. code-block:: bash + + $ python + >>> import celery.platform + >>> celery.platform + + + Here the compiled module is in :file:`/opt/devel/celery/celery/`, + to remove the offending files do: + + .. code-block:: bash + + $ rm -f /opt/devel/celery/celery/platform.py* + +.. _v210-news: + +News +---- + +* Added support for expiration of AMQP results (requires RabbitMQ 2.1.0) + + The new configuration option :setting:`CELERY_AMQP_TASK_RESULT_EXPIRES` + sets the expiry time in seconds (can be int or float): + + .. code-block:: python + + CELERY_AMQP_TASK_RESULT_EXPIRES = 30 * 60 # 30 minutes. + CELERY_AMQP_TASK_RESULT_EXPIRES = 0.80 # 800 ms. + +* celeryev: Event Snapshots + + If enabled, the worker sends messages about what the worker is doing. + These messages are called "events". + The events are used by real-time monitors to show what the + cluster is doing, but they are not very useful for monitoring + over a longer period of time. Snapshots + lets you take "pictures" of the clusters state at regular intervals. + This can then be stored in a database to generate statistics + with, or even monitoring over longer time periods. + + django-celery now comes with a Celery monitor for the Django + Admin interface. To use this you need to run the django-celery + snapshot camera, which stores snapshots to the database at configurable + intervals. + + To use the Django admin monitor you need to do the following: + + 1. Create the new database tables: + + .. code-block:: bash + + $ python manage.py syncdb + + 2. Start the django-celery snapshot camera: + + .. code-block:: bash + + $ python manage.py celerycam + + 3. Open up the django admin to monitor your cluster. + + The admin interface shows tasks, worker nodes, and even + lets you perform some actions, like revoking and rate limiting tasks, + and shutting down worker nodes. + + There's also a Debian init.d script for :mod:`~celery.bin.events` available, + see :ref:`daemonizing` for more information. + + New command-line arguments to celeryev: + + * :option:`-c|--camera`: Snapshot camera class to use. + * :option:`--logfile|-f`: Log file + * :option:`--loglevel|-l`: Log level + * :option:`--maxrate|-r`: Shutter rate limit. + * :option:`--freq|-F`: Shutter frequency + + The :option:`--camera` argument is the name of a class used to take + snapshots with. It must support the interface defined by + :class:`celery.events.snapshot.Polaroid`. + + Shutter frequency controls how often the camera thread wakes up, + while the rate limit controls how often it will actually take + a snapshot. + The rate limit can be an integer (snapshots/s), or a rate limit string + which has the same syntax as the task rate limit strings (`"200/m"`, + `"10/s"`, `"1/h",` etc). + + For the Django camera case, this rate limit can be used to control + how often the snapshots are written to the database, and the frequency + used to control how often the thread wakes up to check if there's + anything new. + + The rate limit is off by default, which means it will take a snapshot + for every :option:`--frequency` seconds. + +* :func:`~celery.task.control.broadcast`: Added callback argument, this can be + used to process replies immediately as they arrive. + +* celeryctl: New command line utility to manage and inspect worker nodes, + apply tasks and inspect the results of tasks. + + .. seealso:: + + The :ref:`monitoring-control` section in the :ref:`guide`. + + Some examples: + + .. code-block:: bash + + $ celeryctl apply tasks.add -a '[2, 2]' --countdown=10 + + $ celeryctl inspect active + $ celeryctl inspect registered_tasks + $ celeryctl inspect scheduled + $ celeryctl inspect --help + $ celeryctl apply --help + +* Added the ability to set an expiry date and time for tasks. + + Example:: + + >>> # Task expires after one minute from now. + >>> task.apply_async(args, kwargs, expires=60) + >>> # Also supports datetime + >>> task.apply_async(args, kwargs, + ... expires=datetime.now() + timedelta(days=1) + + When a worker receives a task that has been expired it will be + marked as revoked (:exc:`~@TaskRevokedError`). + +* Changed the way logging is configured. + + We now configure the root logger instead of only configuring + our custom logger. In addition we don't hijack + the multiprocessing logger anymore, but instead use a custom logger name + for different applications: + + ===================================== ===================================== + **Application** **Logger Name** + ===================================== ===================================== + `celeryd` "celery" + `celerybeat` "celery.beat" + `celeryev` "celery.ev" + ===================================== ===================================== + + This means that the `loglevel` and `logfile` arguments will + affect all registered loggers (even those from 3rd party libraries). + Unless you configure the loggers manually as shown below, that is. + + *Users can choose to configure logging by subscribing to the + :signal:`~celery.signals.setup_logging` signal:* + + .. code-block:: python + + from logging.config import fileConfig + from celery import signals + + @signals.setup_logging.connect + def setup_logging(**kwargs): + fileConfig("logging.conf") + + If there are no receivers for this signal, the logging subsystem + will be configured using the :option:`--loglevel`/:option:`--logfile` + argument, this will be used for *all defined loggers*. + + Remember that the worker also redirects stdout and stderr + to the celery logger, if manually configure logging + you also need to redirect the stdouts manually: + + .. code-block:: python + + from logging.config import fileConfig + from celery import log + + def setup_logging(**kwargs): + import logging + fileConfig("logging.conf") + stdouts = logging.getLogger("mystdoutslogger") + log.redirect_stdouts_to_logger(stdouts, loglevel=logging.WARNING) + +* worker Added command line option :option:`-I`/:option:`--include`: + + A comma separated list of (task) modules to be imported. + + Example: + + .. code-block:: bash + + $ celeryd -I app1.tasks,app2.tasks + +* worker: now emits a warning if running as the root user (euid is 0). + +* :func:`celery.messaging.establish_connection`: Ability to override defaults + used using keyword argument "defaults". + +* worker: Now uses `multiprocessing.freeze_support()` so that it should work + with **py2exe**, **PyInstaller**, **cx_Freeze**, etc. + +* worker: Now includes more metadata for the :state:`STARTED` state: PID and + host name of the worker that started the task. + + See issue #181 + +* subtask: Merge additional keyword arguments to `subtask()` into task keyword + arguments. + + e.g.: + + >>> s = subtask((1, 2), {"foo": "bar"}, baz=1) + >>> s.args + (1, 2) + >>> s.kwargs + {"foo": "bar", "baz": 1} + + See issue #182. + +* worker: Now emits a warning if there is already a worker node using the same + name running on the same virtual host. + +* AMQP result backend: Sending of results are now retried if the connection + is down. + +* AMQP result backend: `result.get()`: Wait for next state if state is not + in :data:`~celery.states.READY_STATES`. + +* TaskSetResult now supports subscription. + + :: + + >>> res = TaskSet(tasks).apply_async() + >>> res[0].get() + +* Added `Task.send_error_emails` + `Task.error_whitelist`, so these can + be configured per task instead of just by the global setting. + +* Added `Task.store_errors_even_if_ignored`, so it can be changed per Task, + not just by the global setting. + +* The crontab scheduler no longer wakes up every second, but implements + `remaining_estimate` (*Optimization*). + +* worker: Store :state:`FAILURE` result if the + :exc:`~@WorkerLostError` exception occurs (worker process + disappeared). + +* worker: Store :state:`FAILURE` result if one of the `*TimeLimitExceeded` + exceptions occurs. + +* Refactored the periodic task responsible for cleaning up results. + + * The backend cleanup task is now only added to the schedule if + :setting:`CELERY_TASK_RESULT_EXPIRES` is set. + + * If the schedule already contains a periodic task named + "celery.backend_cleanup" it won't change it, so the behavior of the + backend cleanup task can be easily changed. + + * The task is now run every day at 4:00 AM, rather than every day since + the first time it was run (using crontab schedule instead of + `run_every`) + + * Renamed `celery.task.builtins.DeleteExpiredTaskMetaTask` + -> :class:`celery.task.builtins.backend_cleanup` + + * The task itself has been renamed from "celery.delete_expired_task_meta" + to "celery.backend_cleanup" + + See issue #134. + +* Implemented `AsyncResult.forget` for sqla/cache/redis/tyrant backends. + (Forget and remove task result). + + See issue #184. + +* :meth:`TaskSetResult.join `: + Added 'propagate=True' argument. + + When set to :const:`False` exceptions occurring in subtasks will + not be re-raised. + +* Added `Task.update_state(task_id, state, meta)` + as a shortcut to `task.backend.store_result(task_id, meta, state)`. + + The backend interface is "private" and the terminology outdated, + so better to move this to :class:`~celery.task.base.Task` so it can be + used. + +* timer2: Set `self.running=False` in + :meth:`~celery.utils.timer2.Timer.stop` so it won't try to join again on + subsequent calls to `stop()`. + +* Log colors are now disabled by default on Windows. + +* `celery.platform` renamed to :mod:`celery.platforms`, so it doesn't + collide with the built-in :mod:`platform` module. + +* Exceptions occurring in Mediator+Pool callbacks are now caught and logged + instead of taking down the worker. + +* Redis result backend: Now supports result expiration using the Redis + `EXPIRE` command. + +* unit tests: Don't leave threads running at tear down. + +* worker: Task results shown in logs are now truncated to 46 chars. + +* `Task.__name__` is now an alias to `self.__class__.__name__`. + This way tasks introspects more like regular functions. + +* `Task.retry`: Now raises :exc:`TypeError` if kwargs argument is empty. + + See issue #164. + +* timedelta_seconds: Use `timedelta.total_seconds` if running on Python 2.7 + +* :class:`~celery.datastructures.TokenBucket`: Generic Token Bucket algorithm + +* :mod:`celery.events.state`: Recording of cluster state can now + be paused and resumed, including support for buffering. + + + .. method:: State.freeze(buffer=True) + + Pauses recording of the stream. + + If `buffer` is true, events received while being frozen will be + buffered, and may be replayed later. + + .. method:: State.thaw(replay=True) + + Resumes recording of the stream. + + If `replay` is true, then the recorded buffer will be applied. + + .. method:: State.freeze_while(fun) + + With a function to apply, freezes the stream before, + and replays the buffer after the function returns. + +* :meth:`EventReceiver.capture ` + Now supports a timeout keyword argument. + +* worker: The mediator thread is now disabled if + :setting:`CELERY_RATE_LIMITS` is enabled, and tasks are directly sent to the + pool without going through the ready queue (*Optimization*). + +.. _v210-fixes: + +Fixes +----- + +* Pool: Process timed out by `TimeoutHandler` must be joined by the Supervisor, + so don't remove it from the internal process list. + + See issue #192. + +* `TaskPublisher.delay_task` now supports exchange argument, so exchange can be + overridden when sending tasks in bulk using the same publisher + + See issue #187. + +* the worker no longer marks tasks as revoked if :setting:`CELERY_IGNORE_RESULT` + is enabled. + + See issue #207. + +* AMQP Result backend: Fixed bug with `result.get()` if + :setting:`CELERY_TRACK_STARTED` enabled. + + `result.get()` would stop consuming after receiving the + :state:`STARTED` state. + +* Fixed bug where new processes created by the pool supervisor becomes stuck + while reading from the task Queue. + + See http://bugs.python.org/issue10037 + +* Fixed timing issue when declaring the remote control command reply queue + + This issue could result in replies being lost, but have now been fixed. + +* Backward compatible `LoggerAdapter` implementation: Now works for Python 2.4. + + Also added support for several new methods: + `fatal`, `makeRecord`, `_log`, `log`, `isEnabledFor`, + `addHandler`, `removeHandler`. + +.. _v210-experimental: + +Experimental +------------ + +* multi: Added daemonization support. + + multi can now be used to start, stop and restart worker nodes: + + .. code-block:: bash + + $ celeryd-multi start jerry elaine george kramer + + This also creates PID files and log files (:file:`celeryd@jerry.pid`, + ..., :file:`celeryd@jerry.log`. To specify a location for these files + use the `--pidfile` and `--logfile` arguments with the `%n` + format: + + .. code-block:: bash + + $ celeryd-multi start jerry elaine george kramer \ + --logfile=/var/log/celeryd@%n.log \ + --pidfile=/var/run/celeryd@%n.pid + + Stopping: + + .. code-block:: bash + + $ celeryd-multi stop jerry elaine george kramer + + Restarting. The nodes will be restarted one by one as the old ones + are shutdown: + + .. code-block:: bash + + $ celeryd-multi restart jerry elaine george kramer + + Killing the nodes (**WARNING**: Will discard currently executing tasks): + + .. code-block:: bash + + $ celeryd-multi kill jerry elaine george kramer + + See `celeryd-multi help` for help. + +* multi: `start` command renamed to `show`. + + `celeryd-multi start` will now actually start and detach worker nodes. + To just generate the commands you have to use `celeryd-multi show`. + +* worker: Added `--pidfile` argument. + + The worker will write its pid when it starts. The worker will + not be started if this file exists and the pid contained is still alive. + +* Added generic init.d script using `celeryd-multi` + + http://github.com/celery/celery/tree/master/extra/generic-init.d/celeryd + +.. _v210-documentation: + +Documentation +------------- + +* Added User guide section: Monitoring + +* Added user guide section: Periodic Tasks + + Moved from `getting-started/periodic-tasks` and updated. + +* tutorials/external moved to new section: "community". + +* References has been added to all sections in the documentation. + + This makes it easier to link between documents. + + diff --git a/docs/history/changelog-2.2.rst b/docs/history/changelog-2.2.rst new file mode 100644 index 0000000..5db27d0 --- /dev/null +++ b/docs/history/changelog-2.2.rst @@ -0,0 +1,1026 @@ +.. _changelog-2.2: + +=============================== + Change history for Celery 2.2 +=============================== + +.. contents:: + :local: + +.. _version-2.2.8: + +2.2.8 +===== +:release-date: 2011-11-25 04:00 P.M GMT +:release-by: Ask Solem + +.. _v228-security-fixes: + +Security Fixes +-------------- + +* [Security: `CELERYSA-0001`_] Daemons would set effective id's rather than + real id's when the :option:`--uid`/:option:`--gid` arguments to + :program:`celery multi`, :program:`celeryd_detach`, + :program:`celery beat` and :program:`celery events` were used. + + This means privileges weren't properly dropped, and that it would + be possible to regain supervisor privileges later. + + +.. _`CELERYSA-0001`: + http://github.com/celery/celery/tree/master/docs/sec/CELERYSA-0001.txt + +.. _version-2.2.7: + +2.2.7 +===== +:release-date: 2011-06-13 04:00 P.M BST +:release-by: Ask Solem + +* New signals: :signal:`after_setup_logger` and + :signal:`after_setup_task_logger` + + These signals can be used to augment logging configuration + after Celery has set up logging. + +* Redis result backend now works with Redis 2.4.4. + +* multi: The :option:`--gid` option now works correctly. + +* worker: Retry wrongfully used the repr of the traceback instead + of the string representation. + +* App.config_from_object: Now loads module, not attribute of module. + +* Fixed issue where logging of objects would give "" + +.. _version-2.2.6: + +2.2.6 +===== +:release-date: 2011-04-15 04:00 P.M CEST +:release-by: Ask Solem + +.. _v226-important: + +Important Notes +--------------- + +* Now depends on Kombu 1.1.2. + +* Dependency lists now explicitly specifies that we don't want python-dateutil + 2.x, as this version only supports py3k. + + If you have installed dateutil 2.0 by accident you should downgrade + to the 1.5.0 version:: + + pip install -U python-dateutil==1.5.0 + + or by easy_install:: + + easy_install -U python-dateutil==1.5.0 + +.. _v226-fixes: + +Fixes +----- + +* The new ``WatchedFileHandler`` broke Python 2.5 support (Issue #367). + +* Task: Don't use ``app.main`` if the task name is set explicitly. + +* Sending emails did not work on Python 2.5, due to a bug in + the version detection code (Issue #378). + +* Beat: Adds method ``ScheduleEntry._default_now`` + + This method can be overridden to change the default value + of ``last_run_at``. + +* An error occurring in process cleanup could mask task errors. + + We no longer propagate errors happening at process cleanup, + but log them instead. This way they will not interfere with publishing + the task result (Issue #365). + +* Defining tasks did not work properly when using the Django + ``shell_plus`` utility (Issue #366). + +* ``AsyncResult.get`` did not accept the ``interval`` and ``propagate`` + arguments. + +* worker: Fixed a bug where the worker would not shutdown if a + :exc:`socket.error` was raised. + +.. _version-2.2.5: + +2.2.5 +===== +:release-date: 2011-03-28 06:00 P.M CEST +:release-by: Ask Solem + +.. _v225-important: + +Important Notes +--------------- + +* Now depends on Kombu 1.0.7 + +.. _v225-news: + +News +---- + +* Our documentation is now hosted by Read The Docs + (http://docs.celeryproject.org), and all links have been changed to point to + the new URL. + +* Logging: Now supports log rotation using external tools like `logrotate.d`_ + (Issue #321) + + This is accomplished by using the ``WatchedFileHandler``, which re-opens + the file if it is renamed or deleted. + +.. _`logrotate.d`: + http://www.ducea.com/2006/06/06/rotating-linux-log-files-part-2-logrotate/ + +* otherqueues tutorial now documents how to configure Redis/Database result + backends. + +* gevent: Now supports ETA tasks. + + But gevent still needs ``CELERY_DISABLE_RATE_LIMITS=True`` to work. + +* TaskSet User Guide: now contains TaskSet callback recipes. + +* Eventlet: New signals: + + * ``eventlet_pool_started`` + * ``eventlet_pool_preshutdown`` + * ``eventlet_pool_postshutdown`` + * ``eventlet_pool_apply`` + + See :mod:`celery.signals` for more information. + +* New :setting:`BROKER_TRANSPORT_OPTIONS` setting can be used to pass + additional arguments to a particular broker transport. + +* worker: ``worker_pid`` is now part of the request info as returned by + broadcast commands. + +* TaskSet.apply/Taskset.apply_async now accepts an optional ``taskset_id`` + argument. + +* The taskset_id (if any) is now available in the Task request context. + +* SQLAlchemy result backend: taskset_id and taskset_id columns now have a + unique constraint. (Tables need to recreated for this to take affect). + +* Task Userguide: Added section about choosing a result backend. + +* Removed unused attribute ``AsyncResult.uuid``. + +.. _v225-fixes: + +Fixes +----- + +* multiprocessing.Pool: Fixes race condition when marking job with + ``WorkerLostError`` (Issue #268). + + The process may have published a result before it was terminated, + but we have no reliable way to detect that this is the case. + + So we have to wait for 10 seconds before marking the result with + WorkerLostError. This gives the result handler a chance to retrieve the + result. + +* multiprocessing.Pool: Shutdown could hang if rate limits disabled. + + There was a race condition when the MainThread was waiting for the pool + semaphore to be released. The ResultHandler now terminates after 5 + seconds if there are unacked jobs, but no worker processes left to start + them (it needs to timeout because there could still be an ack+result + that we haven't consumed from the result queue. It + is unlikely we will receive any after 5 seconds with no worker processes). + +* celerybeat: Now creates pidfile even if the ``--detach`` option is not set. + +* eventlet/gevent: The broadcast command consumer is now running in a separate + greenthread. + + This ensures broadcast commands will take priority even if there are many + active tasks. + +* Internal module ``celery.worker.controllers`` renamed to + ``celery.worker.mediator``. + +* worker: Threads now terminates the program by calling ``os._exit``, as it + is the only way to ensure exit in the case of syntax errors, or other + unrecoverable errors. + +* Fixed typo in ``maybe_timedelta`` (Issue #352). + +* worker: Broadcast commands now logs with loglevel debug instead of warning. + +* AMQP Result Backend: Now resets cached channel if the connection is lost. + +* Polling results with the AMQP result backend was not working properly. + +* Rate limits: No longer sleeps if there are no tasks, but rather waits for + the task received condition (Performance improvement). + +* ConfigurationView: ``iter(dict)`` should return keys, not items (Issue #362). + +* celerybeat: PersistentScheduler now automatically removes a corrupted + schedule file (Issue #346). + +* Programs that doesn't support positional command-line arguments now provides + a user friendly error message. + +* Programs no longer tries to load the configuration file when showing + ``--version`` (Issue #347). + +* Autoscaler: The "all processes busy" log message is now severity debug + instead of error. + +* worker: If the message body can't be decoded, it is now passed through + ``safe_str`` when logging. + + This to ensure we don't get additional decoding errors when trying to log + the failure. + +* ``app.config_from_object``/``app.config_from_envvar`` now works for all + loaders. + +* Now emits a user-friendly error message if the result backend name is + unknown (Issue #349). + +* :mod:`celery.contrib.batches`: Now sets loglevel and logfile in the task + request so ``task.get_logger`` works with batch tasks (Issue #357). + +* worker: An exception was raised if using the amqp transport and the prefetch + count value exceeded 65535 (Issue #359). + + The prefetch count is incremented for every received task with an + ETA/countdown defined. The prefetch count is a short, so can only support + a maximum value of 65535. If the value exceeds the maximum value we now + disable the prefetch count, it is re-enabled as soon as the value is below + the limit again. + +* cursesmon: Fixed unbound local error (Issue #303). + +* eventlet/gevent is now imported on demand so autodoc can import the modules + without having eventlet/gevent installed. + +* worker: Ack callback now properly handles ``AttributeError``. + +* ``Task.after_return`` is now always called *after* the result has been + written. + +* Cassandra Result Backend: Should now work with the latest ``pycassa`` + version. + +* multiprocessing.Pool: No longer cares if the putlock semaphore is released + too many times. (this can happen if one or more worker processes are + killed). + +* SQLAlchemy Result Backend: Now returns accidentally removed ``date_done`` again + (Issue #325). + +* Task.request contex is now always initialized to ensure calling the task + function directly works even if it actively uses the request context. + +* Exception occuring when iterating over the result from ``TaskSet.apply`` + fixed. + +* eventlet: Now properly schedules tasks with an ETA in the past. + +.. _version-2.2.4: + +2.2.4 +===== +:release-date: 2011-02-19 00:00 AM CET +:release-by: Ask Solem + +.. _v224-fixes: + +Fixes +----- + +* worker: 2.2.3 broke error logging, resulting in tracebacks not being logged. + +* AMQP result backend: Polling task states did not work properly if there were + more than one result message in the queue. + +* ``TaskSet.apply_async()`` and ``TaskSet.apply()`` now supports an optional + ``taskset_id`` keyword argument (Issue #331). + +* The current taskset id (if any) is now available in the task context as + ``request.taskset`` (Issue #329). + +* SQLAlchemy result backend: `date_done` was no longer part of the results as it had + been accidentally removed. It is now available again (Issue #325). + +* SQLAlchemy result backend: Added unique constraint on `Task.id` and + `TaskSet.taskset_id`. Tables needs to be recreated for this to take effect. + +* Fixed exception raised when iterating on the result of ``TaskSet.apply()``. + +* Tasks Userguide: Added section on choosing a result backend. + +.. _version-2.2.3: + +2.2.3 +===== +:release-date: 2011-02-12 04:00 P.M CET +:release-by: Ask Solem + +.. _v223-fixes: + +Fixes +----- + +* Now depends on Kombu 1.0.3 + +* Task.retry now supports a ``max_retries`` argument, used to change the + default value. + +* `multiprocessing.cpu_count` may raise :exc:`NotImplementedError` on + platforms where this is not supported (Issue #320). + +* Coloring of log messages broke if the logged object was not a string. + +* Fixed several typos in the init script documentation. + +* A regression caused `Task.exchange` and `Task.routing_key` to no longer + have any effect. This is now fixed. + +* Routing Userguide: Fixes typo, routers in :setting:`CELERY_ROUTES` must be + instances, not classes. + +* :program:`celeryev` did not create pidfile even though the + :option:`--pidfile` argument was set. + +* Task logger format was no longer used. (Issue #317). + + The id and name of the task is now part of the log message again. + +* A safe version of ``repr()`` is now used in strategic places to ensure + objects with a broken ``__repr__`` does not crash the worker, or otherwise + make errors hard to understand (Issue #298). + +* Remote control command :control:`active_queues`: did not account for queues added + at runtime. + + In addition the dictionary replied by this command now has a different + structure: the exchange key is now a dictionary containing the + exchange declaration in full. + +* The :option:`-Q` option to :program:`celery worker` removed unused queue + declarations, so routing of tasks could fail. + + Queues are no longer removed, but rather `app.amqp.queues.consume_from()` + is used as the list of queues to consume from. + + This ensures all queues are available for routing purposes. + +* celeryctl: Now supports the `inspect active_queues` command. + +.. _version-2.2.2: + +2.2.2 +===== +:release-date: 2011-02-03 04:00 P.M CET +:release-by: Ask Solem + +.. _v222-fixes: + +Fixes +----- + +* Celerybeat could not read the schedule properly, so entries in + :setting:`CELERYBEAT_SCHEDULE` would not be scheduled. + +* Task error log message now includes `exc_info` again. + +* The `eta` argument can now be used with `task.retry`. + + Previously it was overwritten by the countdown argument. + +* celery multi/celeryd_detach: Now logs errors occuring when executing + the `celery worker` command. + +* daemonizing tutorial: Fixed typo ``--time-limit 300`` -> + ``--time-limit=300`` + +* Colors in logging broke non-string objects in log messages. + +* ``setup_task_logger`` no longer makes assumptions about magic task kwargs. + +.. _version-2.2.1: + +2.2.1 +===== +:release-date: 2011-02-02 04:00 P.M CET +:release-by: Ask Solem + +.. _v221-fixes: + +Fixes +----- + +* Eventlet pool was leaking memory (Issue #308). + +* Deprecated function ``celery.execute.delay_task`` was accidentally removed, + now available again. + +* ``BasePool.on_terminate`` stub did not exist + +* celeryd_detach: Adds readable error messages if user/group name does not + exist. + +* Smarter handling of unicode decod errors when logging errors. + +.. _version-2.2.0: + +2.2.0 +===== +:release-date: 2011-02-01 10:00 AM CET +:release-by: Ask Solem + +.. _v220-important: + +Important Notes +--------------- + +* Carrot has been replaced with `Kombu`_ + + Kombu is the next generation messaging library for Python, + fixing several flaws present in Carrot that was hard to fix + without breaking backwards compatibility. + + Also it adds: + + * First-class support for virtual transports; Redis, Django ORM, + SQLAlchemy, Beanstalk, MongoDB, CouchDB and in-memory. + * Consistent error handling with introspection, + * The ability to ensure that an operation is performed by gracefully + handling connection and channel errors, + * Message compression (zlib, bzip2, or custom compression schemes). + + This means that `ghettoq` is no longer needed as the + functionality it provided is already available in Celery by default. + The virtual transports are also more feature complete with support + for exchanges (direct and topic). The Redis transport even supports + fanout exchanges so it is able to perform worker remote control + commands. + +.. _`Kombu`: http://pypi.python.org/pypi/kombu + +* Magic keyword arguments pending deprecation. + + The magic keyword arguments were responsibile for many problems + and quirks: notably issues with tasks and decorators, and name + collisions in keyword arguments for the unaware. + + It wasn't easy to find a way to deprecate the magic keyword arguments, + but we think this is a solution that makes sense and it will not + have any adverse effects for existing code. + + The path to a magic keyword argument free world is: + + * the `celery.decorators` module is deprecated and the decorators + can now be found in `celery.task`. + * The decorators in `celery.task` disables keyword arguments by + default + * All examples in the documentation have been changed to use + `celery.task`. + + This means that the following will have magic keyword arguments + enabled (old style): + + .. code-block:: python + + from celery.decorators import task + + @task() + def add(x, y, **kwargs): + print("In task %s" % kwargs["task_id"]) + return x + y + + And this will not use magic keyword arguments (new style): + + .. code-block:: python + + from celery.task import task + + @task() + def add(x, y): + print("In task %s" % add.request.id) + return x + y + + In addition, tasks can choose not to accept magic keyword arguments by + setting the `task.accept_magic_kwargs` attribute. + + .. admonition:: Deprecation + + Using the decorators in :mod:`celery.decorators` emits a + :class:`PendingDeprecationWarning` with a helpful message urging + you to change your code, in version 2.4 this will be replaced with + a :class:`DeprecationWarning`, and in version 4.0 the + :mod:`celery.decorators` module will be removed and no longer exist. + + Similarly, the `task.accept_magic_kwargs` attribute will no + longer have any effect starting from version 4.0. + +* The magic keyword arguments are now available as `task.request` + + This is called *the context*. Using thread-local storage the + context contains state that is related to the current request. + + It is mutable and you can add custom attributes that will only be seen + by the current task request. + + The following context attributes are always available: + + ===================================== =================================== + **Magic Keyword Argument** **Replace with** + ===================================== =================================== + `kwargs["task_id"]` `self.request.id` + `kwargs["delivery_info"]` `self.request.delivery_info` + `kwargs["task_retries"]` `self.request.retries` + `kwargs["logfile"]` `self.request.logfile` + `kwargs["loglevel"]` `self.request.loglevel` + `kwargs["task_is_eager` `self.request.is_eager` + **NEW** `self.request.args` + **NEW** `self.request.kwargs` + ===================================== =================================== + + In addition, the following methods now automatically uses the current + context, so you don't have to pass `kwargs` manually anymore: + + * `task.retry` + * `task.get_logger` + * `task.update_state` + +* `Eventlet`_ support. + + This is great news for I/O-bound tasks! + + To change pool implementations you use the :option:`-P|--pool` argument + to :program:`celery worker`, or globally using the + :setting:`CELERYD_POOL` setting. This can be the full name of a class, + or one of the following aliases: `processes`, `eventlet`, `gevent`. + + For more information please see the :ref:`concurrency-eventlet` section + in the User Guide. + + .. admonition:: Why not gevent? + + For our first alternative concurrency implementation we have focused + on `Eventlet`_, but there is also an experimental `gevent`_ pool + available. This is missing some features, notably the ability to + schedule ETA tasks. + + Hopefully the `gevent`_ support will be feature complete by + version 2.3, but this depends on user demand (and contributions). + +.. _`Eventlet`: http://eventlet.net +.. _`gevent`: http://gevent.org + +* Python 2.4 support deprecated! + + We're happy^H^H^H^H^Hsad to announce that this is the last version + to support Python 2.4. + + You are urged to make some noise if you're currently stuck with + Python 2.4. Complain to your package maintainers, sysadmins and bosses: + tell them it's time to move on! + + Apart from wanting to take advantage of with-statements, coroutines, + conditional expressions and enhanced try blocks, the code base + now contains so many 2.4 related hacks and workarounds it's no longer + just a compromise, but a sacrifice. + + If it really isn't your choice, and you don't have the option to upgrade + to a newer version of Python, you can just continue to use Celery 2.2. + Important fixes can be backported for as long as there is interest. + +* worker: Now supports Autoscaling of child worker processes. + + The :option:`--autoscale` option can be used to configure the minimum + and maximum number of child worker processes:: + + --autoscale=AUTOSCALE + Enable autoscaling by providing + max_concurrency,min_concurrency. Example: + --autoscale=10,3 (always keep 3 processes, but grow to + 10 if necessary). + +* Remote Debugging of Tasks + + ``celery.contrib.rdb`` is an extended version of :mod:`pdb` that + enables remote debugging of processes that does not have terminal + access. + + Example usage: + + .. code-block:: python + + from celery.contrib import rdb + from celery.task import task + + @task() + def add(x, y): + result = x + y + rdb.set_trace() # <- set breakpoint + return result + + + :func:`~celery.contrib.rdb.set_trace` sets a breakpoint at the current + location and creates a socket you can telnet into to remotely debug + your task. + + The debugger may be started by multiple processes at the same time, + so rather than using a fixed port the debugger will search for an + available port, starting from the base port (6900 by default). + The base port can be changed using the environment variable + :envvar:`CELERY_RDB_PORT`. + + By default the debugger will only be available from the local host, + to enable access from the outside you have to set the environment + variable :envvar:`CELERY_RDB_HOST`. + + When the worker encounters your breakpoint it will log the following + information:: + + [INFO/MainProcess] Received task: + tasks.add[d7261c71-4962-47e5-b342-2448bedd20e8] + [WARNING/PoolWorker-1] Remote Debugger:6900: + Please telnet 127.0.0.1 6900. Type `exit` in session to continue. + [2011-01-18 14:25:44,119: WARNING/PoolWorker-1] Remote Debugger:6900: + Waiting for client... + + If you telnet the port specified you will be presented + with a ``pdb`` shell: + + .. code-block:: bash + + $ telnet localhost 6900 + Connected to localhost. + Escape character is '^]'. + > /opt/devel/demoapp/tasks.py(128)add() + -> return result + (Pdb) + + Enter ``help`` to get a list of available commands, + It may be a good idea to read the `Python Debugger Manual`_ if + you have never used `pdb` before. + +.. _`Python Debugger Manual`: http://docs.python.org/library/pdb.html + + +* Events are now transient and is using a topic exchange (instead of direct). + + The `CELERYD_EVENT_EXCHANGE`, `CELERYD_EVENT_ROUTING_KEY`, + `CELERYD_EVENT_EXCHANGE_TYPE` settings are no longer in use. + + This means events will not be stored until there is a consumer, and the + events will be gone as soon as the consumer stops. Also it means there + can be multiple monitors running at the same time. + + The routing key of an event is the type of event (e.g. `worker.started`, + `worker.heartbeat`, `task.succeeded`, etc. This means a consumer can + filter on specific types, to only be alerted of the events it cares about. + + Each consumer will create a unique queue, meaning it is in effect a + broadcast exchange. + + This opens up a lot of possibilities, for example the workers could listen + for worker events to know what workers are in the neighborhood, and even + restart workers when they go down (or use this information to optimize + tasks/autoscaling). + + .. note:: + + The event exchange has been renamed from "celeryevent" to "celeryev" + so it does not collide with older versions. + + If you would like to remove the old exchange you can do so + by executing the following command: + + .. code-block:: bash + + $ camqadm exchange.delete celeryevent + +* The worker now starts without configuration, and configuration can be + specified directly on the command-line. + + Configuration options must appear after the last argument, separated + by two dashes: + + .. code-block:: bash + + $ celery worker -l info -I tasks -- broker.host=localhost broker.vhost=/app + +* Configuration is now an alias to the original configuration, so changes + to the original will reflect Celery at runtime. + +* `celery.conf` has been deprecated, and modifying `celery.conf.ALWAYS_EAGER` + will no longer have any effect. + + The default configuration is now available in the + :mod:`celery.app.defaults` module. The available configuration options + and their types can now be introspected. + +* Remote control commands are now provided by `kombu.pidbox`, the generic + process mailbox. + +* Internal module `celery.worker.listener` has been renamed to + `celery.worker.consumer`, and `.CarrotListener` is now `.Consumer`. + +* Previously deprecated modules `celery.models` and + `celery.management.commands` have now been removed as per the deprecation + timeline. + +* [Security: Low severity] Removed `celery.task.RemoteExecuteTask` and + accompanying functions: `dmap`, `dmap_async`, and `execute_remote`. + + Executing arbitrary code using pickle is a potential security issue if + someone gains unrestricted access to the message broker. + + If you really need this functionality, then you would have to add + this to your own project. + +* [Security: Low severity] The `stats` command no longer transmits the + broker password. + + One would have needed an authenticated broker connection to receive + this password in the first place, but sniffing the password at the + wire level would have been possible if using unencrypted communication. + +.. _v220-news: + +News +---- + +* The internal module `celery.task.builtins` has been removed. + +* The module `celery.task.schedules` is deprecated, and + `celery.schedules` should be used instead. + + For example if you have:: + + from celery.task.schedules import crontab + + You should replace that with:: + + from celery.schedules import crontab + + The module needs to be renamed because it must be possible + to import schedules without importing the `celery.task` module. + +* The following functions have been deprecated and is scheduled for + removal in version 2.3: + + * `celery.execute.apply_async` + + Use `task.apply_async()` instead. + + * `celery.execute.apply` + + Use `task.apply()` instead. + + * `celery.execute.delay_task` + + Use `registry.tasks[name].delay()` instead. + +* Importing `TaskSet` from `celery.task.base` is now deprecated. + + You should use:: + + >>> from celery.task import TaskSet + + instead. + +* New remote control commands: + + * `active_queues` + + Returns the queue declarations a worker is currently consuming from. + +* Added the ability to retry publishing the task message in + the event of connection loss or failure. + + This is disabled by default but can be enabled using the + :setting:`CELERY_TASK_PUBLISH_RETRY` setting, and tweaked by + the :setting:`CELERY_TASK_PUBLISH_RETRY_POLICY` setting. + + In addition `retry`, and `retry_policy` keyword arguments have + been added to `Task.apply_async`. + + .. note:: + + Using the `retry` argument to `apply_async` requires you to + handle the publisher/connection manually. + +* Periodic Task classes (`@periodic_task`/`PeriodicTask`) will *not* be + deprecated as previously indicated in the source code. + + But you are encouraged to use the more flexible + :setting:`CELERYBEAT_SCHEDULE` setting. + +* Built-in daemonization support of the worker using `celery multi` + is no longer experimental and is considered production quality. + + See :ref:`daemon-generic` if you want to use the new generic init + scripts. + +* Added support for message compression using the + :setting:`CELERY_MESSAGE_COMPRESSION` setting, or the `compression` argument + to `apply_async`. This can also be set using routers. + +* worker: Now logs stacktrace of all threads when receiving the + `SIGUSR1` signal. (Does not work on cPython 2.4, Windows or Jython). + + Inspired by https://gist.github.com/737056 + +* Can now remotely terminate/kill the worker process currently processing + a task. + + The `revoke` remote control command now supports a `terminate` argument + Default signal is `TERM`, but can be specified using the `signal` + argument. Signal can be the uppercase name of any signal defined + in the :mod:`signal` module in the Python Standard Library. + + Terminating a task also revokes it. + + Example:: + + >>> from celery.task.control import revoke + + >>> revoke(task_id, terminate=True) + >>> revoke(task_id, terminate=True, signal="KILL") + >>> revoke(task_id, terminate=True, signal="SIGKILL") + +* `TaskSetResult.join_native`: Backend-optimized version of `join()`. + + If available, this version uses the backends ability to retrieve + multiple results at once, unlike `join()` which fetches the results + one by one. + + So far only supported by the AMQP result backend. Support for memcached + and Redis may be added later. + +* Improved implementations of `TaskSetResult.join` and `AsyncResult.wait`. + + An `interval` keyword argument have been added to both so the + polling interval can be specified (default interval is 0.5 seconds). + + A `propagate` keyword argument have been added to `result.wait()`, + errors will be returned instead of raised if this is set to False. + + .. warning:: + + You should decrease the polling interval when using the database + result backend, as frequent polling can result in high database load. + + +* The PID of the child worker process accepting a task is now sent as a field + with the :event:`task-started` event. + +* The following fields have been added to all events in the worker class: + + * `sw_ident`: Name of worker software (e.g. py-celery). + * `sw_ver`: Software version (e.g. 2.2.0). + * `sw_sys`: Operating System (e.g. Linux, Windows, Darwin). + +* For better accuracy the start time reported by the multiprocessing worker + process is used when calculating task duration. + + Previously the time reported by the accept callback was used. + +* `celerybeat`: New built-in daemonization support using the `--detach` + option. + +* `celeryev`: New built-in daemonization support using the `--detach` + option. + +* `TaskSet.apply_async`: Now supports custom publishers by using the + `publisher` argument. + +* Added :setting:`CELERY_SEND_TASK_SENT_EVENT` setting. + + If enabled an event will be sent with every task, so monitors can + track tasks before the workers receive them. + +* `celerybeat`: Now reuses the broker connection when calling + scheduled tasks. + +* The configuration module and loader to use can now be specified on + the command-line. + + For example: + + .. code-block:: bash + + $ celery worker --config=celeryconfig.py --loader=myloader.Loader + +* Added signals: `beat_init` and `beat_embedded_init` + + * :signal:`celery.signals.beat_init` + + Dispatched when :program:`celerybeat` starts (either standalone or + embedded). Sender is the :class:`celery.beat.Service` instance. + + * :signal:`celery.signals.beat_embedded_init` + + Dispatched in addition to the :signal:`beat_init` signal when + :program:`celerybeat` is started as an embedded process. Sender + is the :class:`celery.beat.Service` instance. + +* Redis result backend: Removed deprecated settings `REDIS_TIMEOUT` and + `REDIS_CONNECT_RETRY`. + +* CentOS init script for :program:`celery worker` now available in `extra/centos`. + +* Now depends on `pyparsing` version 1.5.0 or higher. + + There have been reported issues using Celery with pyparsing 1.4.x, + so please upgrade to the latest version. + +* Lots of new unit tests written, now with a total coverage of 95%. + +.. _v220-fixes: + +Fixes +----- + +* `celeryev` Curses Monitor: Improved resize handling and UI layout + (Issue #274 + Issue #276) + +* AMQP Backend: Exceptions occurring while sending task results are now + propagated instead of silenced. + + the worker will then show the full traceback of these errors in the log. + +* AMQP Backend: No longer deletes the result queue after successful + poll, as this should be handled by the + :setting:`CELERY_AMQP_TASK_RESULT_EXPIRES` setting instead. + +* AMQP Backend: Now ensures queues are declared before polling results. + +* Windows: worker: Show error if running with `-B` option. + + Running celerybeat embedded is known not to work on Windows, so + users are encouraged to run celerybeat as a separate service instead. + +* Windows: Utilities no longer output ANSI color codes on Windows + +* camqadm: Now properly handles Ctrl+C by simply exiting instead of showing + confusing traceback. + +* Windows: All tests are now passing on Windows. + +* Remove bin/ directory, and `scripts` section from setup.py. + + This means we now rely completely on setuptools entrypoints. + +.. _v220-experimental: + +Experimental +------------ + +* Jython: worker now runs on Jython using the threaded pool. + + All tests pass, but there may still be bugs lurking around the corners. + +* PyPy: worker now runs on PyPy. + + It runs without any pool, so to get parallel execution you must start + multiple instances (e.g. using :program:`multi`). + + Sadly an initial benchmark seems to show a 30% performance decrease on + pypy-1.4.1 + JIT. We would like to find out why this is, so stay tuned. + +* :class:`PublisherPool`: Experimental pool of task publishers and + connections to be used with the `retry` argument to `apply_async`. + + The example code below will re-use connections and channels, and + retry sending of the task message if the connection is lost. + + .. code-block:: python + + from celery import current_app + + # Global pool + pool = current_app().amqp.PublisherPool(limit=10) + + def my_view(request): + with pool.acquire() as publisher: + add.apply_async((2, 2), publisher=publisher, retry=True) + + + diff --git a/docs/history/changelog-2.3.rst b/docs/history/changelog-2.3.rst new file mode 100644 index 0000000..90a4454 --- /dev/null +++ b/docs/history/changelog-2.3.rst @@ -0,0 +1,370 @@ +.. _changelog-2.3: + +=============================== + Change history for Celery 2.3 +=============================== + +.. contents:: + :local: + +.. _version-2.3.4: + +2.3.4 +===== +:release-date: 2011-11-25 04:00 P.M GMT +:release-by: Ask Solem + +.. _v234-security-fixes: + +Security Fixes +-------------- + +* [Security: `CELERYSA-0001`_] Daemons would set effective id's rather than + real id's when the :option:`--uid`/:option:`--gid` arguments to + :program:`celery multi`, :program:`celeryd_detach`, + :program:`celery beat` and :program:`celery events` were used. + + This means privileges weren't properly dropped, and that it would + be possible to regain supervisor privileges later. + + +.. _`CELERYSA-0001`: + http://github.com/celery/celery/tree/master/docs/sec/CELERYSA-0001.txt + +Fixes +----- + +* Backported fix for #455 from 2.4 to 2.3. + +* Statedb was not saved at shutdown. + +* Fixes worker sometimes hanging when hard time limit exceeded. + + +.. _version-2.3.3: + +2.3.3 +===== +:release-date: 2011-16-09 05:00 P.M BST +:release-by: Mher Movsisyan + +* Monkey patching :attr:`sys.stdout` could result in the worker + crashing if the replacing object did not define :meth:`isatty` + (Issue #477). + +* ``CELERYD`` option in :file:`/etc/default/celeryd` should not + be used with generic init scripts. + + +.. _version-2.3.2: + +2.3.2 +===== +:release-date: 2011-10-07 05:00 P.M BST +:release-by: Ask Solem + +.. _v232-news: + +News +---- + +* Improved Contributing guide. + + If you'd like to contribute to Celery you should read the + :ref:`Contributing Gudie `. + + We are looking for contributors at all skill levels, so don't + hesitate! + +* Now depends on Kombu 1.3.1 + +* ``Task.request`` now contains the current worker host name (Issue #460). + + Available as ``task.request.hostname``. + +* It is now easier for app subclasses to extend how they are pickled. + (see :class:`celery.app.AppPickler`). + +.. _v232-fixes: + +Fixes +----- + +* `purge/discard_all` was not working correctly (Issue #455). + +* The coloring of log messages didn't handle non-ASCII data well + (Issue #427). + +* [Windows] the multiprocessing pool tried to import ``os.kill`` + even though this is not available there (Issue #450). + +* Fixes case where the worker could become unresponsive because of tasks + exceeding the hard time limit. + +* The :event:`task-sent` event was missing from the event reference. + +* ``ResultSet.iterate`` now returns results as they finish (Issue #459). + + This was not the case previously, even though the documentation + states this was the expected behavior. + +* Retries will no longer be performed when tasks are called directly + (using ``__call__``). + + Instead the exception passed to ``retry`` will be re-raised. + +* Eventlet no longer crashes if autoscale is enabled. + + growing and shrinking eventlet pools is still not supported. + +* py24 target removed from :file:`tox.ini`. + + +.. _version-2.3.1: + +2.3.1 +===== +:release-date: 2011-08-07 08:00 P.M BST +:release-by: Ask Solem + +Fixes +----- + +* The :setting:`CELERY_AMQP_TASK_RESULT_EXPIRES` setting did not work, + resulting in an AMQP related error about not being able to serialize + floats while trying to publish task states (Issue #446). + +.. _version-2.3.0: + +2.3.0 +===== +:release-date: 2011-08-05 12:00 P.M BST +:tested: cPython: 2.5, 2.6, 2.7; PyPy: 1.5; Jython: 2.5.2 +:release-by: Ask Solem + +.. _v230-important: + +Important Notes +--------------- + +* Now requires Kombu 1.2.1 + +* Results are now disabled by default. + + The AMQP backend was not a good default because often the users were + not consuming the results, resulting in thousands of queues. + + While the queues can be configured to expire if left unused, it was not + possible to enable this by default because this was only available in + recent RabbitMQ versions (2.1.1+) + + With this change enabling a result backend will be a conscious choice, + which will hopefully lead the user to read the documentation and be aware + of any common pitfalls with the particular backend. + + The default backend is now a dummy backend + (:class:`celery.backends.base.DisabledBackend`). Saving state is simply an + noop operation, and AsyncResult.wait(), .result, .state, etc. will raise + a :exc:`NotImplementedError` telling the user to configure the result backend. + + For help choosing a backend please see :ref:`task-result-backends`. + + If you depend on the previous default which was the AMQP backend, then + you have to set this explicitly before upgrading:: + + CELERY_RESULT_BACKEND = "amqp" + + .. note:: + + For django-celery users the default backend is still ``database``, + and results are not disabled by default. + +* The Debian init scripts have been deprecated in favor of the generic-init.d + init scripts. + + In addition generic init scripts for celerybeat and celeryev has been + added. + +.. _v230-news: + +News +---- + +* Automatic connection pool support. + + The pool is used by everything that requires a broker connection. For + example calling tasks, sending broadcast commands, retrieving results + with the AMQP result backend, and so on. + + The pool is disabled by default, but you can enable it by configuring the + :setting:`BROKER_POOL_LIMIT` setting:: + + BROKER_POOL_LIMIT = 10 + + A limit of 10 means a maximum of 10 simultaneous connections can co-exist. + Only a single connection will ever be used in a single-thread + environment, but in a concurrent environment (threads, greenlets, etc., but + not processes) when the limit has been exceeded, any try to acquire a + connection will block the thread and wait for a connection to be released. + This is something to take into consideration when choosing a limit. + + A limit of :const:`None` or 0 means no limit, and connections will be + established and closed every time. + +* Introducing Chords (taskset callbacks). + + A chord is a task that only executes after all of the tasks in a taskset + has finished executing. It's a fancy term for "taskset callbacks" + adopted from + `Cω `_). + + It works with all result backends, but the best implementation is + currently provided by the Redis result backend. + + Here's an example chord:: + + >>> chord(add.subtask((i, i)) + ... for i in xrange(100))(tsum.subtask()).get() + 9900 + + Please read the :ref:`Chords section in the user guide `, if you + want to know more. + +* Time limits can now be set for individual tasks. + + To set the soft and hard time limits for a task use the ``time_limit`` + and ``soft_time_limit`` attributes: + + .. code-block:: python + + import time + + @task(time_limit=60, soft_time_limit=30) + def sleeptask(seconds): + time.sleep(seconds) + + If the attributes are not set, then the workers default time limits + will be used. + + New in this version you can also change the time limits for a task + at runtime using the :func:`time_limit` remote control command:: + + >>> from celery.task import control + >>> control.time_limit("tasks.sleeptask", + ... soft=60, hard=120, reply=True) + [{'worker1.example.com': {'ok': 'time limits set successfully'}}] + + Only tasks that starts executing after the time limit change will be affected. + + .. note:: + + Soft time limits will still not work on Windows or other platforms + that do not have the ``SIGUSR1`` signal. + +* Redis backend configuration directive names changed to include the + ``CELERY_`` prefix. + + + ===================================== =================================== + **Old setting name** **Replace with** + ===================================== =================================== + `REDIS_HOST` `CELERY_REDIS_HOST` + `REDIS_PORT` `CELERY_REDIS_PORT` + `REDIS_DB` `CELERY_REDIS_DB` + `REDIS_PASSWORD` `CELERY_REDIS_PASSWORD` + ===================================== =================================== + + The old names are still supported but pending deprecation. + +* PyPy: The default pool implementation used is now multiprocessing + if running on PyPy 1.5. + +* multi: now supports "pass through" options. + + Pass through options makes it easier to use celery without a + configuration file, or just add last-minute options on the command + line. + + Example use: + + .. code-block:: bash + + $ celery multi start 4 -c 2 -- broker.host=amqp.example.com \ + broker.vhost=/ \ + celery.disable_rate_limits=yes + +* celerybeat: Now retries establishing the connection (Issue #419). + +* celeryctl: New ``list bindings`` command. + + Lists the current or all available bindings, depending on the + broker transport used. + +* Heartbeat is now sent every 30 seconds (previously every 2 minutes). + +* ``ResultSet.join_native()`` and ``iter_native()`` is now supported by + the Redis and Cache result backends. + + This is an optimized version of ``join()`` using the underlying + backends ability to fetch multiple results at once. + +* Can now use SSL when sending error e-mails by enabling the + :setting:`EMAIL_USE_SSL` setting. + +* ``events.default_dispatcher()``: Context manager to easily obtain + an event dispatcher instance using the connection pool. + +* Import errors in the configuration module will not be silenced anymore. + +* ResultSet.iterate: Now supports the ``timeout``, ``propagate`` and + ``interval`` arguments. + +* ``with_default_connection`` -> ``with default_connection`` + +* TaskPool.apply_async: Keyword arguments ``callbacks`` and ``errbacks`` + has been renamed to ``callback`` and ``errback`` and take a single scalar + value instead of a list. + +* No longer propagates errors occurring during process cleanup (Issue #365) + +* Added ``TaskSetResult.delete()``, which will delete a previously + saved taskset result. + +* Celerybeat now syncs every 3 minutes instead of only at + shutdown (Issue #382). + +* Monitors now properly handles unknown events, so user-defined events + are displayed. + +* Terminating a task on Windows now also terminates all of the tasks child + processes (Issue #384). + +* worker: ``-I|--include`` option now always searches the current directory + to import the specified modules. + +* Cassandra backend: Now expires results by using TTLs. + +* Functional test suite in ``funtests`` is now actually working properly, and + passing tests. + +.. _v230-fixes: + +Fixes +----- + +* celeryev was trying to create the pidfile twice. + +* celery.contrib.batches: Fixed problem where tasks failed + silently (Issue #393). + +* Fixed an issue where logging objects would give " celery.concurrency.eventlet + celery.concurrency.evg -> celery.concurrency.gevent + +* AUTHORS file is now sorted alphabetically. + + Also, as you may have noticed the contributors of new features/fixes are + now mentioned in the Changelog. diff --git a/docs/history/changelog-2.5.rst b/docs/history/changelog-2.5.rst new file mode 100644 index 0000000..fa395a2 --- /dev/null +++ b/docs/history/changelog-2.5.rst @@ -0,0 +1,214 @@ +.. _changelog-2.5: + +=============================== + Change history for Celery 2.5 +=============================== + +This document contains change notes for bugfix releases in the 2.5.x series, +please see :ref:`whatsnew-2.5` for an overview of what's +new in Celery 2.5. + +If you're looking for versions prior to 2.5 you should visit our +:ref:`history` of releases. + +.. contents:: + :local: + +.. _version-2.5.5: + +2.5.5 +===== +:release-date: 2012-06-06 04:00 P.M BST +:release-by: Ask Solem + +This is a dummy release performed for the following goals: + +- Protect against force upgrading to Kombu 2.2.0 +- Version parity with django-celery + +.. _version-2.5.3: + +2.5.3 +===== +:release-date: 2012-04-16 07:00 P.M BST +:release-by: Ask Solem + +* A bug causes messages to be sent with UTC timestamps even though + :setting:`CELERY_ENABLE_UTC` was not enabled (Issue #636). + +* celerybeat: No longer crashes if an entry's args is set to None + (Issue #657). + +* Autoreload did not work if a module's ``__file__`` attribute + was set to the modules '.pyc' file. (Issue #647). + +* Fixes early 2.5 compatibility where __package__ does not exist + (Issue #638). + +.. _version-2.5.2: + +2.5.2 +===== +:release-date: 2012-04-13 04:30 P.M GMT +:release-by: Ask Solem + +.. _v252-news: + +News +---- + +- Now depends on Kombu 2.1.5. + +- Django documentation has been moved to the main Celery docs. + + See :ref:`django`. + +- New :signal:`celeryd_init` signal can be used to configure workers + by hostname. + +- Signal.connect can now be used as a decorator. + + Example: + + .. code-block:: python + + from celery.signals import task_sent + + @task_sent.connect + def on_task_sent(**kwargs): + print("sent task: %r" % (kwargs, )) + +- Invalid task messages are now rejected instead of acked. + + This means that they will be moved to the dead-letter queue + introduced in the latest RabbitMQ version (but must be enabled + manually, consult the RabbitMQ documentation). + +- Internal logging calls has been cleaned up to work + better with tools like Sentry. + + Contributed by David Cramer. + +- New method ``subtask.clone()`` can be used to clone an existing + subtask with augmented arguments/options. + + Example: + + .. code-block:: python + + >>> s = add.subtask((5, )) + >>> new = s.clone(args=(10, ), countdown=5}) + >>> new.args + (10, 5) + + >>> new.options + {"countdown": 5} + +- Chord callbacks are now triggered in eager mode. + +.. _v252-fixes: + +Fixes +----- + +- Programs now verifies that the pidfile is actually written correctly + (Issue #641). + + Hopefully this will crash the worker immediately if the system + is out of space to store the complete pidfile. + + In addition, we now verify that existing pidfiles contain + a new line so that a partially written pidfile is detected as broken, + as before doing: + + echo -n "1" > celeryd.pid + + would cause the worker to think that an existing instance was already + running (init has pid 1 after all). + +- Fixed 2.5 compatibility issue with use of print_exception. + + Fix contributed by Martin Melin. + +- Fixed 2.5 compatibility issue with imports. + + Fix contributed by Iurii Kriachko. + +- All programs now fix up ``__package__`` when called as main. + + This fixes compatibility with Python 2.5. + + Fix contributed by Martin Melin. + +- [celery control|inspect] can now be configured on the command-line. + + Like with the worker it is now possible to configure celery settings + on the command-line for celery control|inspect + + .. code-block:: bash + + $ celery inspect -- broker.pool_limit=30 + +- Version dependency for python-dateutil fixed to be strict. + + Fix contributed by Thomas Meson. + +- ``Task.__call__`` is now optimized away in the task tracer + rather than when the task class is created. + + This fixes a bug where a custom __call__ may mysteriously disappear. + +- Autoreload's inotify support has been improved. + + Contributed by Mher Movsisyan. + +- The Django broker documentation has been improved. + +- Removed confusing warning at top of routing user guide. + +.. _version-2.5.1: + +2.5.1 +===== +:release-date: 2012-03-01 01:00 P.M GMT +:release-by: Ask Solem + +.. _v251-fixes: + +Fixes +----- + +* Eventlet/Gevent: A small typo caused the worker to hang when eventlet/gevent + was used, this was because the environment was not monkey patched + early enough. + +* Eventlet/Gevent: Another small typo caused the mediator to be started + with eventlet/gevent, which would make the worker sometimes hang at shutdown. + +* Mulitprocessing: Fixed an error occurring if the pool was stopped + before it was properly started. + +* Proxy objects now redirects ``__doc__`` and ``__name__`` so ``help(obj)`` + works. + +* Internal timer (timer2) now logs exceptions instead of swallowing them + (Issue #626). + +* celery shell: can now be started with :option:`--eventlet` or + :option:`--gevent` options to apply their monkey patches. + +.. _version-2.5.0: + +2.5.0 +===== +:release-date: 2012-02-24 04:00 P.M GMT +:release-by: Ask Solem + +See :ref:`whatsnew-2.5`. + +Since the changelog has gained considerable size, we decided to +do things differently this time: by having separate "what's new" +documents for major version changes. + +Bugfix releases will still be found in the changelog. + diff --git a/docs/history/changelog-3.0.rst b/docs/history/changelog-3.0.rst new file mode 100644 index 0000000..76994ed --- /dev/null +++ b/docs/history/changelog-3.0.rst @@ -0,0 +1,1600 @@ +.. _changelog-3.0: + +=============================== + Change history for Celery 3.0 +=============================== + +.. contents:: + :local: + +If you're looking for versions prior to 3.0.x you should go to :ref:`history`. + +.. _version-3.0.24: + +3.0.24 +====== +:release-date: 2013-10-11 04:40 P.M BST +:release-by: Ask Solem + +- Now depends on :ref:`Kombu 2.5.15 `. + +- Now depends on :mod:`billiard` version 2.7.3.34. + +- AMQP Result backend: No longer caches queue declarations. + + The queues created by the AMQP result backend are always unique, + so caching the declarations caused a slow memory leak. + +- Worker: Fixed crash when hostname contained Unicode characters. + + Contributed by Daodao. + +- The worker would no longer start if the `-P solo` pool was selected + (Issue #1548). + +- Redis/Cache result backends would not complete chords + if any of the tasks were retried (Issue #1401). + +- Task decorator is no longer lazy if app is finalized. + +- AsyncResult: Fixed bug with ``copy(AsyncResult)`` when no + ``current_app`` available. + +- ResultSet: Now properly propagates app when passed string id's. + +- Loader now ignores :envvar:`CELERY_CONFIG_MODULE` if value is empty string. + +- Fixed race condition in Proxy object where it tried to + delete an attribute twice, resulting in :exc:`AttributeError`. + +- Task methods now works with the :setting:`CELERY_ALWAYS_EAGER` setting + (Issue #1478). + +- :class:`~kombu.common.Broadcast` queues were accidentally declared + when publishing tasks (Issue #1540). + +- New :envvar:`C_FAKEFORK` environment variable can be used to + debug the init scripts. + + Setting this will skip the daemonization step so that errors + printed to stderr after standard outs are closed can be seen:: + + $ C_FAKEFORK /etc/init.d/celeryd start + + This works with the `celery multi` command in general. + +- ``get_pickleable_etype`` did not always return a value (Issue #1556). +- Fixed bug where ``app.GroupResult.restore`` would fall back to the default + app. + +- Fixed rare bug where built-in tasks would use the current_app. + +- :func:`~celery.platforms.maybe_fileno` now handles :exc:`ValueError`. + +.. _version-3.0.23: + +3.0.23 +====== +:release-date: 2013-09-02 01:00 P.M BST +:release-by: Ask Solem + +- Now depends on :ref:`Kombu 2.5.14 `. + +- ``send_task`` did not honor ``link`` and ``link_error`` arguments. + + This had the side effect of chains not calling unregistered tasks, + silently discarding them. + + Fix contributed by Taylor Nelson. + +- :mod:`celery.state`: Optimized precedence lookup. + + Contributed by Matt Robenolt. + +- Posix: Daemonization did not redirect ``sys.stdin`` to ``/dev/null``. + + Fix contributed by Alexander Smirnov. + +- Canvas: group bug caused fallback to default app when ``.apply_async`` used + (Issue #1516) + +- Canvas: generator arguments was not always pickleable. + +.. _version-3.0.22: + +3.0.22 +====== +:release-date: 2013-08-16 04:30 P.M BST +:release-by: Ask Solem + +- Now depends on :ref:`Kombu 2.5.13 `. + +- Now depends on :mod:`billiard` 2.7.3.32 + +- Fixed bug with monthly and yearly crontabs (Issue #1465). + + Fix contributed by Guillaume Gauvrit. + +- Fixed memory leak caused by time limits (Issue #1129, Issue #1427) + +- Worker will now sleep if being restarted more than 5 times + in one second to avoid spamming with ``worker-online`` events. + +- Includes documentation fixes + + Contributed by: Ken Fromm, Andreas Savvides, Alex Kiriukha, + Michael Fladischer. + +.. _version-3.0.21: + +3.0.21 +====== +:release-date: 2013-07-05 04:30 P.M BST +:release-by: Ask Solem + +- Now depends on :mod:`billiard` 2.7.3.31. + + This version fixed a bug when running without the billiard C extension. + +- 3.0.20 broke eventlet/gevent support (worker not starting). + +- Fixed memory leak problem when MongoDB result backend was used with the + gevent pool. + + Fix contributed by Ross Lawley. + +.. _version-3.0.20: + +3.0.20 +====== +:release-date: 2013-06-28 04:00 P.M BST +:release-by: Ask Solem + +- Contains workaround for deadlock problems. + + A better solution will be part of Celery 3.1. + +- Now depends on :ref:`Kombu 2.5.12 `. + +- Now depends on :mod:`billiard` 2.7.3.30. + +- ``--loader`` argument no longer supported importing loaders from the + current directory. + +- [Worker] Fixed memory leak when restarting after connection lost + (Issue #1325). + +- [Worker] Fixed UnicodeDecodeError at startup (Issue #1373). + + Fix contributed by Jessica Tallon. + +- [Worker] Now properly rewrites unpickleable exceptions again. + +- Fixed possible race condition when evicting items from the revoked task set. + +- [generic-init.d] Fixed compatibility with Ubuntu's minimal Dash + shell (Issue #1387). + + Fix contributed by monkut. + +- ``Task.apply``/``ALWAYS_EAGER`` now also executes callbacks and errbacks + (Issue #1336). + +- [Worker] The :signal:`worker-shutdown` signal was no longer being dispatched + (Issue #1339)j + +- [Python 3] Fixed problem with threading.Event. + + Fix contributed by Xavier Ordoquy. + +- [Python 3] Now handles ``io.UnsupportedOperation`` that may be raised + by ``file.fileno()`` in Python 3. + +- [Python 3] Fixed problem with qualname. + +- [events.State] Now ignores unknown event-groups. + +- [MongoDB backend] No longer uses deprecated ``safe`` parameter. + + Fix contributed by rfkrocktk + +- The eventlet pool now imports on Windows. + +- [Canvas] Fixed regression where immutable chord members may receive + arguments (Issue #1340). + + Fix contributed by Peter Brook. + +- [Canvas] chain now accepts generator argument again (Issue #1319). + +- ``celery.migrate`` command now consumes from all queues if no queues + specified. + + Fix contributed by John Watson. + +.. _version-3.0.19: + +3.0.19 +====== +:release-date: 2013-04-17 04:30:00 P.M BST +:release-by: Ask Solem + +- Now depends on :mod:`billiard` 2.7.3.28 + +- A Python 3 related fix managed to disable the deadlock fix + announced in 3.0.18. + + Tests have been added to make sure this does not happen again. + +- Task retry policy: Default max_retries is now 3. + + This ensures clients will not be hanging while the broker is down. + + .. note:: + + You can set a longer retry for the worker by + using the :signal:`celeryd_after_setup` signal: + + .. code-block:: python + + from celery.signals import celeryd_after_setup + + @celeryd_after_setup.connect + def configure_worker(instance, conf, **kwargs): + conf.CELERY_TASK_PUBLISH_RETRY_POLICY = { + 'max_retries': 100, + 'interval_start': 0, + 'interval_max': 1, + 'interval_step': 0.2, + } + +- Worker: Will now properly display message body in error messages + even if the body is a buffer instance. + +- 3.0.18 broke the MongoDB result backend (Issue #1303). + +.. _version-3.0.18: + +3.0.18 +====== +:release-date: 2013-04-12 05:00:00 P.M BST +:release-by: Ask Solem + +- Now depends on :mod:`kombu` 2.5.10. + + See the :ref:`kombu changelog `. + +- Now depends on :mod:`billiard` 2.7.3.27. + +- Can now specify a whitelist of accepted serializers using + the new :setting:`CELERY_ACCEPT_CONTENT` setting. + + This means that you can force the worker to discard messages + serialized with pickle and other untrusted serializers. + For example to only allow JSON serialized messages use:: + + CELERY_ACCEPT_CONTENT = ['json'] + + you can also specify MIME types in the whitelist:: + + CELERY_ACCEPT_CONTENT = ['application/json'] + +- Fixed deadlock in multiprocessing's pool caused by the + semaphore not being released when terminated by signal. + +- Processes Pool: It's now possible to debug pool processes using GDB. + +- ``celery report`` now censors possibly secret settings, like passwords + and secret tokens. + + You should still check the output before pasting anything + on the internet. + +- Connection URLs now ignore multiple '+' tokens. + +- Worker/statedb: Now uses pickle protocol 2 (Py2.5+) + +- Fixed Python 3 compatibility issues. + +- Worker: A warning is now given if a worker is started with the + same node name as an existing worker. + +- Worker: Fixed a deadlock that could occur while revoking tasks (Issue #1297). + +- Worker: The :sig:`HUP` handler now closes all open file descriptors + before restarting to ensure file descriptors does not leak (Issue #1270). + +- Worker: Optimized storing/loading the revoked tasks list (Issue #1289). + + After this change the ``--statedb`` file will take up more disk space, + but loading from and storing the revoked tasks will be considerably + faster (what before took 5 minutes will now take less than a second). + +- Celery will now suggest alternatives if there's a typo in the + broker transport name (e.g. ``ampq`` -> ``amqp``). + +- Worker: The auto-reloader would cause a crash if a monitored file + was unlinked. + + Fix contributed by Agris Ameriks. + +- Fixed AsyncResult pickling error. + + Fix contributed by Thomas Minor. + +- Fixed handling of Unicode in logging output when using log colors + (Issue #427). + +- :class:`~celery.app.utils.ConfigurationView` is now a ``MutableMapping``. + + Contributed by Aaron Harnly. + +- Fixed memory leak in LRU cache implementation. + + Fix contributed by Romuald Brunet. + +- ``celery.contrib.rdb``: Now works when sockets are in non-blocking mode. + + Fix contributed by Theo Spears. + +- The `inspect reserved` remote control command included active (started) tasks + with the reserved tasks (Issue #1030). + +- The :signal:`task_failure` signal received a modified traceback object + meant for pickling purposes, this has been fixed so that it now + receives the real traceback instead. + +- The ``@task`` decorator silently ignored positional arguments, + it now raises the expected :exc:`TypeError` instead (Issue #1125). + +- The worker will now properly handle messages with invalid + eta/expires fields (Issue #1232). + +- The ``pool_restart`` remote control command now reports + an error if the :setting:`CELERYD_POOL_RESTARTS` setting is not set. + +- ``celery.conf.add_defaults`` can now be used with non-dict objects. + +- Fixed compatibility problems in the Proxy class (Issue #1087). + + The class attributes ``__module__``, ``__name__`` and ``__doc__`` + are now meaningful string objects. + + Thanks to Marius Gedminas. + +- MongoDB Backend: The :setting:`MONGODB_BACKEND_SETTINGS` setting + now accepts a ``option`` key that lets you forward arbitrary kwargs + to the underlying ``pymongo.Connection`` object (Issue #1015). + +- Beat: The daily backend cleanup task is no longer enabled + for result backends that support automatic result expiration (Issue #1031). + +- Canvas list operations now takes application instance from the first + task in the list, instead of depending on the ``current_app`` (Issue #1249). + +- Worker: Message decoding error log message now includes traceback + information. + +- Worker: The startup banner now includes system platform. + +- ``celery inspect|status|control`` now gives an error if used + with an SQL based broker transport. + +.. _version-3.0.17: + +3.0.17 +====== +:release-date: 2013-03-22 04:00:00 P.M UTC +:release-by: Ask Solem + +- Now depends on kombu 2.5.8 + +- Now depends on billiard 2.7.3.23 + +- RabbitMQ/Redis: thread-less and lock-free rate-limit implementation. + + This means that rate limits pose minimal overhead when used with + RabbitMQ/Redis or future transports using the eventloop, + and that the rate-limit implementation is now thread-less and lock-free. + + The thread-based transports will still use the old implementation for + now, but the plan is to use the timer also for other + broker transports in Celery 3.1. + +- Rate limits now works with eventlet/gevent if using RabbitMQ/Redis as the + broker. + +- A regression caused ``task.retry`` to ignore additional keyword arguments. + + Extra keyword arguments are now used as execution options again. + Fix contributed by Simon Engledew. + +- Windows: Fixed problem with the worker trying to pickle the Django settings + module at worker startup. + +- generic-init.d: No longer double quotes ``$CELERYD_CHDIR`` (Issue #1235). + +- generic-init.d: Removes bash-specific syntax. + + Fix contributed by Pär Wieslander. + +- Cassandra Result Backend: Now handles the + :exc:`~pycassa.AllServersUnavailable` error (Issue #1010). + + Fix contributed by Jared Biel. + +- Result: Now properly forwards apps to GroupResults when deserializing + (Issue #1249). + + Fix contributed by Charles-Axel Dein. + +- ``GroupResult.revoke`` now supports the ``terminate`` and ``signal`` + keyword arguments. + +- Worker: Multiprocessing pool workers now import task modules/configuration + before setting up the logging system so that logging signals can be + connected before they're dispatched. + +- chord: The ``AsyncResult`` instance returned now has its ``parent`` + attribute set to the header ``GroupResult``. + + This is consistent with how ``chain`` works. + +.. _version-3.0.16: + +3.0.16 +====== +:release-date: 2013-03-07 04:00:00 P.M UTC +:release-by: Ask Solem + +- Happy International Women's Day! + + We have a long way to go, so this is a chance for you to get involved in one + of the organizations working for making our communities more + diverse. + + - PyLadies — http://pyladies.com + - Girls Who Code — http://www.girlswhocode.com + - Women Who Code — http://www.meetup.com/Women-Who-Code-SF/ + +- Now depends on :mod:`kombu` version 2.5.7 + +- Now depends on :mod:`billiard` version 2.7.3.22 + +- AMQP heartbeats are now disabled by default. + + Some users experiences issues with heartbeats enabled, + and it's not strictly necessary to use them. + + If you're experiencing problems detecting connection failures, + you can re-enable heartbeats by configuring the :setting:`BROKER_HEARTBEAT` + setting. + +- Worker: Now propagates connection errors occurring in multiprocessing + callbacks, so that the connection can be reset (Issue #1226). + +- Worker: Now propagates connection errors occurring in timer callbacks, + so that the connection can be reset. + +- The modules in :setting:`CELERY_IMPORTS` and :setting:`CELERY_INCLUDE` + are now imported in the original order (Issue #1161). + + The modules in :setting:`CELERY_IMPORTS` will be imported first, + then continued by :setting:`CELERY_INCLUDE`. + + Thanks to Joey Wilhelm. + +- New bash completion for ``celery`` available in the git repository: + + https://github.com/celery/celery/tree/3.0/extra/bash-completion + + You can source this file or put it in ``bash_completion.d`` to + get auto-completion for the ``celery`` command-line utility. + +- The node name of a worker can now include unicode characters (Issue #1186). + +- The repr of a ``crontab`` object now displays correctly (Issue #972). + +- ``events.State`` no longer modifies the original event dictionary. + +- No longer uses ``Logger.warn`` deprecated in Python 3. + +- Cache Backend: Now works with chords again (Issue #1094). + +- Chord unlock now handles errors occurring while calling the callback. + +- Generic worker init.d script: Status check is now performed by + querying the pid of the instance instead of sending messages. + + Contributed by Milen Pavlov. + +- Improved init scripts for CentOS. + + - Updated to support celery 3.x conventions. + - Now uses CentOS built-in ``status`` and ``killproc`` + - Support for multi-node / multi-pid worker services. + - Standard color-coded CentOS service-init output. + - A test suite. + + Contributed by Milen Pavlov. + +- ``ResultSet.join`` now always works with empty result set (Issue #1219). + +- A ``group`` consisting of a single task is now supported (Issue #1219). + +- Now supports the ``pycallgraph`` program (Issue #1051). + +- Fixed Jython compatibility problems. + +- Django tutorial: Now mentions that the example app must be added to + ``INSTALLED_APPS`` (Issue #1192). + +.. _version-3.0.15: + +3.0.15 +====== +:release-date: 2013-02-11 04:30:00 P.M UTC +:release-by: Ask Solem + +- Now depends on billiard 2.7.3.21 which fixed a syntax error crash. + +- Fixed bug with :setting:`CELERY_SEND_TASK_SENT_EVENT`. + +.. _version-3.0.14: + +3.0.14 +====== +:release-date: 2013-02-08 05:00:00 P.M UTC +:release-by: Ask Solem + +- Now depends on Kombu 2.5.6 + +- Now depends on billiard 2.7.3.20 + +- ``execv`` is now disabled by default. + + It was causing too many problems for users, you can still enable + it using the :setting:`CELERYD_FORCE_EXECV` setting. + + execv was only enabled when transports other than amqp/redis was used, + and it's there to prevent deadlocks caused by mutexes not being released + before the process forks. Unfortunately it also changes the environment + introducing many corner case bugs that is hard to fix without adding + horrible hacks. Deadlock issues are reported far less often than the + bugs that execv are causing, so we now disable it by default. + + Work is in motion to create non-blocking versions of these transports + so that execv is not necessary (which is the situation with the amqp + and redis broker transports) + +- Chord exception behavior defined (Issue #1172). + + From Celery 3.1 the chord callback will change state to FAILURE + when a task part of a chord raises an exception. + + It was never documented what happens in this case, + and the actual behavior was very unsatisfactory, indeed + it will just forward the exception value to the chord callback. + + For backward compatibility reasons we do not change to the new + behavior in a bugfix release, even if the current behavior was + never documented. Instead you can enable the + :setting:`CELERY_CHORD_PROPAGATES` setting to get the new behavior + that will be default from Celery 3.1. + + See more at :ref:`chord-errors`. + +- worker: Fixes bug with ignored and retried tasks. + + The ``on_chord_part_return`` and ``Task.after_return`` callbacks, + nor the ``task_postrun`` signal should be called when the task was + retried/ignored. + + Fix contributed by Vlad. + +- ``GroupResult.join_native`` now respects the ``propagate`` argument. + +- ``subtask.id`` added as an alias to ``subtask['options'].id`` + + .. code-block:: python + + >>> s = add.s(2, 2) + >>> s.id = 'my-id' + >>> s['options'] + {'task_id': 'my-id'} + + >>> s.id + 'my-id' + +- worker: Fixed error `Could not start worker processes` occurring + when restarting after connection failure (Issue #1118). + +- Adds new signal :signal:`task-retried` (Issue #1169). + +- `celery events --dumper` now handles connection loss. + +- Will now retry sending the task-sent event in case of connection failure. + +- amqp backend: Now uses ``Message.requeue`` instead of republishing + the message after poll. + +- New :setting:`BROKER_HEARTBEAT_CHECKRATE` setting introduced to modify the + rate at which broker connection heartbeats are monitored. + + The default value was also changed from 3.0 to 2.0. + +- :class:`celery.events.state.State` is now pickleable. + + Fix contributed by Mher Movsisyan. + +- :class:`celery.datastructures.LRUCache` is now pickleable. + + Fix contributed by Mher Movsisyan. + +- The stats broadcast command now includes the workers pid. + + Contributed by Mher Movsisyan. + +- New ``conf`` remote control command to get a workers current configuration. + + Contributed by Mher Movsisyan. + +- Adds the ability to modify the chord unlock task's countdown + argument (Issue #1146). + + Contributed by Jun Sakai + +- beat: The scheduler now uses the `now()`` method of the schedule, + so that schedules can provide a custom way to get the current date and time. + + Contributed by Raphaël Slinckx + +- Fixed pickling of configuration modules on Windows or when execv is used + (Issue #1126). + +- Multiprocessing logger is now configured with loglevel ``ERROR`` + by default. + + Since 3.0 the multiprocessing loggers were disabled by default + (only configured when the :envvar:`MP_LOG` environment variable was set). + +.. _version-3.0.13: + +3.0.13 +====== +:release-date: 2013-01-07 04:00:00 P.M UTC +:release-by: Ask Solem + +- Now depends on Kombu 2.5 + + - py-amqp has replaced amqplib as the default transport, + gaining support for AMQP 0.9, and the RabbitMQ extensions + including Consumer Cancel Notifications and heartbeats. + + - support for multiple connection URLs for failover. + + - Read more in the :ref:`Kombu 2.5 changelog `. + +- Now depends on billiard 2.7.3.19 + +- Fixed a deadlock issue that could occur when the producer pool + inherited the connection pool instance of the parent process. + +- The :option:`--loader` option now works again (Issue #1066). + +- :program:`celery` umbrella command: All subcommands now supports + the :option:`--workdir` option (Issue #1063). + +- Groups included in chains now give GroupResults (Issue #1057) + + Previously it would incorrectly add a regular result instead of a group + result, but now this works: + + .. code-block:: python + + # [4 + 4, 4 + 8, 16 + 8] + >>> res = (add.s(2, 2) | group(add.s(4), add.s(8), add.s(16)))() + >>> res + + +- Chains can now chain other chains and use partial arguments (Issue #1057). + + Example: + + .. code-block:: python + + >>> c1 = (add.s(2) | add.s(4)) + >>> c2 = (add.s(8) | add.s(16)) + + >>> c3 = (c1 | c2) + + # 8 + 2 + 4 + 8 + 16 + >>> assert c3(8).get() == 38 + +- Subtasks can now be used with unregistered tasks. + + You can specify subtasks even if you just have the name:: + + >>> s = subtask(task_name, args=(), kwargs=()) + >>> s.delay() + +- The :program:`celery shell` command now always adds the current + directory to the module path. + +- The worker will now properly handle the :exc:`pytz.AmbiguousTimeError` + exception raised when an ETA/countdown is prepared while being in DST + transition (Issue #1061). + +- force_execv: Now makes sure that task symbols in the original + task modules will always use the correct app instance (Issue #1072). + +- AMQP Backend: Now republishes result messages that have been polled + (using ``result.ready()`` and friends, ``result.get()`` will not do this + in this version). + +- Crontab schedule values can now "wrap around" + + This means that values like ``11-1`` translates to ``[11, 12, 1]``. + + Contributed by Loren Abrams. + +- multi stopwait command now shows the pid of processes. + + Contributed by Loren Abrams. + +- Handling of ETA/countdown fixed when the :setting:`CELERY_ENABLE_UTC` + setting is disabled (Issue #1065). + +- A number of uneeded properties were included in messages, + caused by accidentally passing ``Queue.as_dict`` as message properties. + +- Rate limit values can now be float + + This also extends the string format so that values like ``"0.5/s"`` works. + + Contributed by Christoph Krybus + +- Fixed a typo in the broadcast routing documentation (Issue #1026). + +- Rewrote confusing section about idempotence in the task user guide. + +- Fixed typo in the daemonization tutorial (Issue #1055). + +- Fixed several typos in the documentation. + + Contributed by Marius Gedminas. + +- Batches: Now works when using the eventlet pool. + + Fix contributed by Thomas Grainger. + +- Batches: Added example sending results to :mod:`celery.contrib.batches`. + + Contributed by Thomas Grainger. + +- Mongodb backend: Connection ``max_pool_size`` can now be set in + :setting:`CELERY_MONGODB_BACKEND_SETTINGS`. + + Contributed by Craig Younkins. + +- Fixed problem when using earlier versions of :mod:`pytz`. + + Fix contributed by Vlad. + +- Docs updated to include the default value for the + :setting:`CELERY_TASK_RESULT_EXPIRES` setting. + +- Improvements to the django-celery tutorial. + + Contributed by Locker537. + +- The ``add_consumer`` control command did not properly persist + the addition of new queues so that they survived connection failure + (Issue #1079). + + +3.0.12 +====== +:release-date: 2012-11-06 02:00 P.M UTC +:release-by: Ask Solem + +- Now depends on kombu 2.4.8 + + - [Redis] New and improved fair queue cycle algorithm (Kevin McCarthy). + - [Redis] Now uses a Redis-based mutex when restoring messages. + - [Redis] Number of messages that can be restored in one interval is no + longer limited (but can be set using the + ``unacked_restore_limit`` + :setting:`transport option `.) + - Heartbeat value can be specified in broker URLs (Mher Movsisyan). + - Fixed problem with msgpack on Python 3 (Jasper Bryant-Greene). + +- Now depends on billiard 2.7.3.18 + +- Celery can now be used with static analysis tools like PyDev/PyCharm/pylint + etc. + +- Development documentation has moved to Read The Docs. + + The new URL is: http://docs.celeryproject.org/en/master + +- New :setting:`CELERY_QUEUE_HA_POLICY` setting used to set the default + HA policy for queues when using RabbitMQ. + +- New method ``Task.subtask_from_request`` returns a subtask using the current + request. + +- Results get_many method did not respect timeout argument. + + Fix contributed by Remigiusz Modrzejewski + +- generic_init.d scripts now support setting :envvar:`CELERY_CREATE_DIRS` to + always create log and pid directories (Issue #1045). + + This can be set in your :file:`/etc/default/celeryd`. + +- Fixed strange kombu import problem on Python 3.2 (Issue #1034). + +- Worker: ETA scheduler now uses millisecond precision (Issue #1040). + +- The ``--config`` argument to programs is now supported by all loaders. + +- The :setting:`CASSANDRA_OPTIONS` setting has now been documented. + + Contributed by Jared Biel. + +- Task methods (:mod:`celery.contrib.methods`) cannot be used with the old + task base class, the task decorator in that module now inherits from the new. + +- An optimization was too eager and caused some logging messages to never emit. + +- :mod:`celery.contrib.batches` now works again. + +- Fixed missing whitespace in ``bdist_rpm`` requirements (Issue #1046). + +- Event state's ``tasks_by_name`` applied limit before filtering by name. + + Fix contributed by Alexander A. Sosnovskiy. + +.. _version-3.0.11: + +3.0.11 +====== +:release-date: 2012-09-26 04:00 P.M UTC +:release-by: Ask Solem + +- [security:low] generic-init.d scripts changed permissions of /var/log & /var/run + + In the daemonization tutorial the recommended directories were as follows: + + .. code-block:: bash + + CELERYD_LOG_FILE="/var/log/celery/%n.log" + CELERYD_PID_FILE="/var/run/celery/%n.pid" + + But in the scripts themselves the default files were ``/var/log/celery%n.log`` + and ``/var/run/celery%n.pid``, so if the user did not change the location + by configuration, the directories ``/var/log`` and ``/var/run`` would be + created - and worse have their permissions and owners changed. + + This change means that: + + - Default pid file is ``/var/run/celery/%n.pid`` + - Default log file is ``/var/log/celery/%n.log`` + + - The directories are only created and have their permissions + changed if *no custom locations are set*. + + Users can force paths to be created by calling the ``create-paths`` + subcommand: + + .. code-block:: bash + + $ sudo /etc/init.d/celeryd create-paths + + .. admonition:: Upgrading Celery will not update init scripts + + To update the init scripts you have to re-download + the files from source control and update them manually. + You can find the init scripts for version 3.0.x at: + + http://github.com/celery/celery/tree/3.0/extra/generic-init.d + +- Now depends on billiard 2.7.3.17 + +- Fixes request stack protection when app is initialized more than + once (Issue #1003). + +- ETA tasks now properly works when system timezone is not the same + as the configured timezone (Issue #1004). + +- Terminating a task now works if the task has been sent to the + pool but not yet acknowledged by a pool process (Issue #1007). + + Fix contributed by Alexey Zatelepin + +- Terminating a task now properly updates the state of the task to revoked, + and sends a ``task-revoked`` event. + +- Generic worker init script now waits for workers to shutdown by default. + +- Multi: No longer parses --app option (Issue #1008). + +- Multi: stop_verify command renamed to stopwait. + +- Daemonization: Now delays trying to create pidfile/logfile until after + the working directory has been changed into. + +- :program:`celery worker` and :program:`celery beat` commands now respects + the :option:`--no-color` option (Issue #999). + +- Fixed typos in eventlet examples (Issue #1000) + + Fix contributed by Bryan Bishop. + Congratulations on opening bug #1000! + +- Tasks that raise :exc:`~celery.exceptions.Ignore` are now acknowledged. + +- Beat: Now shows the name of the entry in ``sending due task`` logs. + +.. _version-3.0.10: + +3.0.10 +====== +:release-date: 2012-09-20 05:30 P.M BST +:release-by: Ask Solem + +- Now depends on kombu 2.4.7 + +- Now depends on billiard 2.7.3.14 + + - Fixes crash at startup when using Django and pre-1.4 projects + (setup_environ). + + - Hard time limits now sends the KILL signal shortly after TERM, + to terminate processes that have signal handlers blocked by C extensions. + + - Billiard now installs even if the C extension cannot be built. + + It's still recommended to build the C extension if you are using + a transport other than rabbitmq/redis (or use forced execv for some + other reason). + + - Pool now sets a ``current_process().index`` attribute that can be used to create + as many log files as there are processes in the pool. + +- Canvas: chord/group/chain no longer modifies the state when called + + Previously calling a chord/group/chain would modify the ids of subtasks + so that: + + .. code-block:: python + + >>> c = chord([add.s(2, 2), add.s(4, 4)], xsum.s()) + >>> c() + >>> c() <-- call again + + at the second time the ids for the tasks would be the same as in the + previous invocation. This is now fixed, so that calling a subtask + won't mutate any options. + +- Canvas: Chaining a chord to another task now works (Issue #965). + +- Worker: Fixed a bug where the request stack could be corrupted if + relative imports are used. + + Problem usually manifested itself as an exception while trying to + send a failed task result (``NoneType does not have id attribute``). + + Fix contributed by Sam Cooke. + +- Tasks can now raise :exc:`~celery.exceptions.Ignore` to skip updating states + or events after return. + + Example: + + .. code-block:: python + + from celery.exceptions import Ignore + + @task + def custom_revokes(): + if redis.sismember('tasks.revoked', custom_revokes.request.id): + raise Ignore() + +- The worker now makes sure the request/task stacks are not modified + by the initial ``Task.__call__``. + + This would previously be a problem if a custom task class defined + ``__call__`` and also called ``super()``. + +- Because of problems the fast local optimization has been disabled, + and can only be enabled by setting the :envvar:`USE_FAST_LOCALS` attribute. + +- Worker: Now sets a default socket timeout of 5 seconds at shutdown + so that broken socket reads do not hinder proper shutdown (Issue #975). + +- More fixes related to late eventlet/gevent patching. + +- Documentation for settings out of sync with reality: + + - :setting:`CELERY_TASK_PUBLISH_RETRY` + + Documented as disabled by default, but it was enabled by default + since 2.5 as stated by the 2.5 changelog. + + - :setting:`CELERY_TASK_PUBLISH_RETRY_POLICY` + + The default max_retries had been set to 100, but documented as being + 3, and the interval_max was set to 1 but documented as 0.2. + The default setting are now set to 3 and 0.2 as it was originally + documented. + + Fix contributed by Matt Long. + +- Worker: Log messages when connection established and lost have been improved. + +- The repr of a crontab schedule value of '0' should be '*' (Issue #972). + +- Revoked tasks are now removed from reserved/active state in the worker + (Issue #969) + + Fix contributed by Alexey Zatelepin. + +- gevent: Now supports hard time limits using ``gevent.Timeout``. + +- Documentation: Links to init scripts now point to the 3.0 branch instead + of the development branch (master). + +- Documentation: Fixed typo in signals user guide (Issue #986). + + ``instance.app.queues`` -> ``instance.app.amqp.queues``. + +- Eventlet/gevent: The worker did not properly set the custom app + for new greenlets. + +- Eventlet/gevent: Fixed a bug where the worker could not recover + from connection loss (Issue #959). + + Also, because of a suspected bug in gevent the + :setting:`BROKER_CONNECTION_TIMEOUT` setting has been disabled + when using gevent + +3.0.9 +===== +:release-date: 2012-08-31 06:00 P.M BST +:release-by: Ask Solem + +- Important note for users of Django and the database scheduler! + + Recently a timezone issue has been fixed for periodic tasks, + but erroneous timezones could have already been stored in the + database, so for the fix to work you need to reset + the ``last_run_at`` fields. + + You can do this by executing the following command: + + .. code-block:: bash + + $ python manage.py shell + >>> from djcelery.models import PeriodicTask + >>> PeriodicTask.objects.update(last_run_at=None) + + You also have to do this if you change the timezone or + :setting:`CELERY_ENABLE_UTC` setting. + +- Note about the :setting:`CELERY_ENABLE_UTC` setting. + + If you previously disabled this just to force periodic tasks to work with + your timezone, then you are now *encouraged to re-enable it*. + +- Now depends on Kombu 2.4.5 which fixes PyPy + Jython installation. + +- Fixed bug with timezones when :setting:`CELERY_ENABLE_UTC` is disabled + (Issue #952). + +- Fixed a typo in the celerybeat upgrade mechanism (Issue #951). + +- Make sure the `exc_info` argument to logging is resolved (Issue #899). + +- Fixed problem with Python 3.2 and thread join timeout overflow (Issue #796). + +- A test case was occasionally broken for Python 2.5. + +- Unit test suite now passes for PyPy 1.9. + +- App instances now supports the with statement. + + This calls the new :meth:`~celery.Celery.close` method at exit, which + cleans up after the app like closing pool connections. + + Note that this is only necessary when dynamically creating apps, + e.g. for "temporary" apps. + +- Support for piping a subtask to a chain. + + For example: + + .. code-block:: python + + pipe = sometask.s() | othertask.s() + new_pipe = mytask.s() | pipe + + Contributed by Steve Morin. + +- Fixed problem with group results on non-pickle serializers. + + Fix contributed by Steeve Morin. + +.. _version-3.0.8: + +3.0.8 +===== +:release-date: 2012-08-29 05:00 P.M BST +:release-by: Ask Solem + +- Now depends on Kombu 2.4.4 + +- Fixed problem with amqplib and receiving larger message payloads + (Issue #922). + + The problem would manifest itself as either the worker hanging, + or occasionally a ``Framing error`` exception appearing. + + Users of the new ``pyamqp://`` transport must upgrade to + :mod:`amqp` 0.9.3. + +- Beat: Fixed another timezone bug with interval and crontab schedules + (Issue #943). + +- Beat: The schedule file is now automatically cleared if the timezone + is changed. + + The schedule is also cleared when you upgrade to 3.0.8 from an earlier + version, this to register the initial timezone info. + +- Events: The :event:`worker-heartbeat` event now include processed and active + count fields. + + Contributed by Mher Movsisyan. + +- Fixed error with error email and new task classes (Issue #931). + +- ``BaseTask.__call__`` is no longer optimized away if it has been monkey + patched. + +- Fixed shutdown issue when using gevent (Issue #911 & Issue #936). + + Fix contributed by Thomas Meson. + +.. _version-3.0.7: + +3.0.7 +===== +:release-date: 2012-08-24 05:00 P.M BST +:release-by: Ask Solem + +- Fixes several problems with periodic tasks and timezones (Issue #937). + +- Now depends on kombu 2.4.2 + + - Redis: Fixes a race condition crash + + - Fixes an infinite loop that could happen when retrying establishing + the broker connection. + +- Daemons now redirect standard file descriptors to :file:`/dev/null` + + Though by default the standard outs are also redirected + to the logger instead, but you can disable this by changing + the :setting:`CELERY_REDIRECT_STDOUTS` setting. + +- Fixes possible problems when eventlet/gevent is patched too late. + +- ``LoggingProxy`` no longer defines ``fileno()`` (Issue #928). + +- Results are now ignored for the chord unlock task. + + Fix contributed by Steeve Morin. + +- Cassandra backend now works if result expiry is disabled. + + Fix contributed by Steeve Morin. + +- The traceback object is now passed to signal handlers instead + of the string representation. + + Fix contributed by Adam DePue. + +- Celery command: Extensions are now sorted by name. + +- A regression caused the :event:`task-failed` event to be sent + with the exception object instead of its string representation. + +- The worker daemon would try to create the pid file before daemonizing + to catch errors, but this file was not immediately released (Issue #923). + +- Fixes Jython compatibility. + +- ``billiard.forking_enable`` was called by all pools not just the + processes pool, which would result in a useless warning if the billiard + C extensions were not installed. + +.. _version-3.0.6: + +3.0.6 +===== +:release-date: 2012-08-17 11:00 P.M BST +:release-by: Ask Solem + +- Now depends on kombu 2.4.0 + +- Now depends on billiard 2.7.3.12 + +- Redis: Celery now tries to restore messages whenever there are no messages + in the queue. + +- Crontab schedules now properly respects :setting:`CELERY_TIMEZONE` setting. + + It's important to note that crontab schedules uses UTC time by default + unless this setting is set. + + Issue #904 and django-celery #150. + +- ``billiard.enable_forking`` is now only set by the processes pool. + +- The transport is now properly shown by :program:`celery report` + (Issue #913). + +- The `--app` argument now works if the last part is a module name + (Issue #921). + +- Fixed problem with unpickleable exceptions (billiard #12). + +- Adds ``task_name`` attribute to ``EagerResult`` which is always + :const:`None` (Issue #907). + +- Old Task class in :mod:`celery.task` no longer accepts magic kwargs by + default (Issue #918). + + A regression long ago disabled magic kwargs for these, and since + no one has complained about it we don't have any incentive to fix it now. + +- The ``inspect reserved`` control command did not work properly. + +- Should now play better with static analyzation tools by explicitly + specifying dynamically created attributes in the :mod:`celery` and + :mod:`celery.task` modules. + +- Terminating a task now results in + :exc:`~celery.exceptions.RevokedTaskError` instead of a ``WorkerLostError``. + +- ``AsyncResult.revoke`` now accepts ``terminate`` and ``signal`` arguments. + +- The :event:`task-revoked` event now includes new fields: ``terminated``, + ``signum``, and ``expired``. + +- The argument to :class:`~celery.exceptions.TaskRevokedError` is now one + of the reasons ``revoked``, ``expired`` or ``terminated``. + +- Old Task class does no longer use classmethods for push_request and + pop_request (Issue #912). + +- ``GroupResult`` now supports the ``children`` attribute (Issue #916). + +- ``AsyncResult.collect`` now respects the ``intermediate`` argument + (Issue #917). + +- Fixes example task in documentation (Issue #902). + +- Eventlet fixed so that the environment is patched as soon as possible. + +- eventlet: Now warns if celery related modules that depends on threads + are imported before eventlet is patched. + +- Improved event and camera examples in the monitoring guide. + +- Disables celery command setuptools entrypoints if the command can't be + loaded. + +- Fixed broken ``dump_request`` example in the tasks guide. + + + +.. _version-3.0.5: + +3.0.5 +===== +:release-date: 2012-08-01 04:00 P.M BST +:release-by: Ask Solem + +- Now depends on kombu 2.3.1 + billiard 2.7.3.11 + +- Fixed a bug with the -B option (``cannot pickle thread.lock objects``) + (Issue #894 + Issue #892, + django-celery #154). + +- The :control:`restart_pool` control command now requires the + :setting:`CELERYD_POOL_RESTARTS` setting to be enabled + + This change was necessary as the multiprocessing event that the restart + command depends on is responsible for creating many semaphores/file + descriptors, resulting in problems in some environments. + +- ``chain.apply`` now passes args to the first task (Issue #889). + +- Documented previously secret options to the Django-Celery monitor + in the monitoring userguide (Issue #396). + +- Old changelog are now organized in separate documents for each series, + see :ref:`history`. + +.. _version-3.0.4: + +3.0.4 +===== +:release-date: 2012-07-26 07:00 P.M BST +:release-by: Ask Solem + +- Now depends on Kombu 2.3 + +- New experimental standalone Celery monitor: Flower + + See :ref:`monitoring-flower` to read more about it! + + Contributed by Mher Movsisyan. + +- Now supports AMQP heartbeats if using the new ``pyamqp://`` transport. + + - The py-amqp transport requires the :mod:`amqp` library to be installed:: + + $ pip install amqp + + - Then you need to set the transport URL prefix to ``pyamqp://``. + + - The default heartbeat value is 10 seconds, but this can be changed using + the :setting:`BROKER_HEARTBEAT` setting:: + + BROKER_HEARTBEAT = 5.0 + + - If the broker heartbeat is set to 10 seconds, the heartbeats will be + monitored every 5 seconds (double the hertbeat rate). + + See the :ref:`Kombu 2.3 changelog ` for more information. + +- Now supports RabbitMQ Consumer Cancel Notifications, using the ``pyamqp://`` + transport. + + This is essential when running RabbitMQ in a cluster. + + See the :ref:`Kombu 2.3 changelog ` for more information. + +- Delivery info is no longer passed directly through. + + It was discovered that the SQS transport adds objects that can't + be pickled to the delivery info mapping, so we had to go back + to using the whitelist again. + + Fixing this bug also means that the SQS transport is now working again. + +- The semaphore was not properly released when a task was revoked (Issue #877). + + This could lead to tasks being swallowed and not released until a worker + restart. + + Thanks to Hynek Schlawack for debugging the issue. + +- Retrying a task now also forwards any linked tasks. + + This means that if a task is part of a chain (or linked in some other + way) and that even if the task is retried, then the next task in the chain + will be executed when the retry succeeds. + +- Chords: Now supports setting the interval and other keyword arguments + to the chord unlock task. + + - The interval can now be set as part of the chord subtasks kwargs:: + + chord(header)(body, interval=10.0) + + - In addition the chord unlock task now honors the Task.default_retry_delay + option, used when none is specified, which also means that the default + interval can also be changed using annotations: + + .. code-block:: python + + CELERY_ANNOTATIONS = { + 'celery.chord_unlock': { + 'default_retry_delay': 10.0, + } + } + +- New :meth:`@Celery.add_defaults` method can add new default configuration + dicts to the applications configuration. + + For example:: + + config = {'FOO': 10} + + celery.add_defaults(config) + + is the same as ``celery.conf.update(config)`` except that data will not be + copied, and that it will not be pickled when the worker spawns child + processes. + + In addition the method accepts a callable:: + + def initialize_config(): + # insert heavy stuff that can't be done at import time here. + + celery.add_defaults(initialize_config) + + which means the same as the above except that it will not happen + until the celery configuration is actually used. + + As an example, Celery can lazily use the configuration of a Flask app:: + + flask_app = Flask() + celery = Celery() + celery.add_defaults(lambda: flask_app.config) + +- Revoked tasks were not marked as revoked in the result backend (Issue #871). + + Fix contributed by Hynek Schlawack. + +- Eventloop now properly handles the case when the epoll poller object + has been closed (Issue #882). + +- Fixed syntax error in ``funtests/test_leak.py`` + + Fix contributed by Catalin Iacob. + +- group/chunks: Now accepts empty task list (Issue #873). + +- New method names: + + - ``Celery.default_connection()`` ➠ :meth:`~@Celery.connection_or_acquire`. + - ``Celery.default_producer()`` ➠ :meth:`~@Celery.producer_or_acquire`. + + The old names still work for backward compatibility. + + +.. _version-3.0.3: + +3.0.3 +===== +:release-date: 2012-07-20 09:17 P.M BST +:release-by: Ask Solem + +- amqplib passes the channel object as part of the delivery_info + and it's not pickleable, so we now remove it. + +.. _version-3.0.2: + +3.0.2 +===== +:release-date: 2012-07-20 04:00 P.M BST +:release-by: Ask Solem + +- A bug caused the following task options to not take defaults from the + configuration (Issue #867 + Issue #858) + + The following settings were affected: + + - :setting:`CELERY_IGNORE_RESULT` + - :setting:`CELERYD_SEND_TASK_ERROR_EMAILS` + - :setting:`CELERY_TRACK_STARTED` + - :setting:`CElERY_STORE_ERRORS_EVEN_IF_IGNORED` + + Fix contributed by John Watson. + +- Task Request: ``delivery_info`` is now passed through as-is (Issue #807). + +- The eta argument now supports datetime's with a timezone set (Issue #855). + +- The worker's banner displayed the autoscale settings in the wrong order + (Issue #859). + +- Extension commands are now loaded after concurrency is set up + so that they don't interfere with e.g. eventlet patching. + +- Fixed bug in the threaded pool (Issue #863) + +- The task failure handler mixed up the fields in :func:`sys.exc_info`. + + Fix contributed by Rinat Shigapov. + +- Fixed typos and wording in the docs. + + Fix contributed by Paul McMillan + +- New setting: :setting:`CELERY_WORKER_DIRECT` + + If enabled each worker will consume from their own dedicated queue + which can be used to route tasks to specific workers. + +- Fixed several edge case bugs in the add consumer remote control command. + +- :mod:`~celery.contrib.migrate`: Can now filter and move tasks to specific + workers if :setting:`CELERY_WORKER_DIRECT` is enabled. + + Among other improvements, the following functions have been added: + + * ``move_direct(filterfun, **opts)`` + * ``move_direct_by_id(task_id, worker_hostname, **opts)`` + * ``move_direct_by_idmap({task_id: worker_hostname, ...}, **opts)`` + * ``move_direct_by_taskmap({task_name: worker_hostname, ...}, **opts)`` + +- :meth:`~celery.Celery.default_connection` now accepts a pool argument that + if set to false causes a new connection to be created instead of acquiring + one from the pool. + +- New signal: :signal:`celeryd_after_setup`. + +- Default loader now keeps lowercase attributes from the configuration module. + +.. _version-3.0.1: + +3.0.1 +===== +:release-date: 2012-07-10 06:00 P.M BST +:release-by: Ask Solem + +- Now depends on kombu 2.2.5 + +- inspect now supports limit argument:: + + myapp.control.inspect(limit=1).ping() + +- Beat: now works with timezone aware datetime's. + +- Task classes inheriting ``from celery import Task`` + mistakingly enabled ``accept_magic_kwargs``. + +- Fixed bug in ``inspect scheduled`` (Issue #829). + +- Beat: Now resets the schedule to upgrade to UTC. + +- The :program:`celery worker` command now works with eventlet/gevent. + + Previously it would not patch the environment early enough. + +- The :program:`celery` command now supports extension commands + using setuptools entry-points. + + Libraries can add additional commands to the :program:`celery` + command by adding an entry-point like:: + + setup( + entry_points=[ + 'celery.commands': [ + 'foo = my.module:Command', + ], + ], + ...) + + The command must then support the interface of + :class:`celery.bin.base.Command`. + +- contrib.migrate: New utilities to move tasks from one queue to another. + + - :func:`~celery.contrib.migrate.move_tasks` + - :func:`~celery.contrib.migrate.move_task_by_id` + +- The :event:`task-sent` event now contains ``exchange`` and ``routing_key`` + fields. + +- Fixes bug with installing on Python 3. + + Fix contributed by Jed Smith. + +.. _version-3.0.0: + +3.0.0 (Chiastic Slide) +====================== +:release-date: 2012-07-07 01:30 P.M BST +:release-by: Ask Solem + +See :ref:`whatsnew-3.0`. diff --git a/docs/history/index.rst b/docs/history/index.rst new file mode 100644 index 0000000..673532d --- /dev/null +++ b/docs/history/index.rst @@ -0,0 +1,23 @@ +.. _history: + +========= + History +========= + +This section contains historical change histories, for the latest +version please visit :ref:`changelog`. + +:Release: |version| +:Date: |today| + +.. toctree:: + :maxdepth: 2 + + changelog-3.0 + changelog-2.5 + changelog-2.4 + changelog-2.3 + changelog-2.2 + changelog-2.1 + changelog-2.0 + changelog-1.0 diff --git a/docs/images/celery_128.png b/docs/images/celery_128.png new file mode 100644 index 0000000..6795fc6 Binary files /dev/null and b/docs/images/celery_128.png differ diff --git a/docs/images/celery_512.png b/docs/images/celery_512.png new file mode 100644 index 0000000..e128408 Binary files /dev/null and b/docs/images/celery_512.png differ diff --git a/docs/images/celeryevshotsm.jpg b/docs/images/celeryevshotsm.jpg new file mode 100644 index 0000000..e49927e Binary files /dev/null and b/docs/images/celeryevshotsm.jpg differ diff --git a/docs/images/dashboard.png b/docs/images/dashboard.png new file mode 100644 index 0000000..20a8f73 Binary files /dev/null and b/docs/images/dashboard.png differ diff --git a/docs/images/monitor.png b/docs/images/monitor.png new file mode 100644 index 0000000..47d7e3b Binary files /dev/null and b/docs/images/monitor.png differ diff --git a/docs/images/result_graph.png b/docs/images/result_graph.png new file mode 100644 index 0000000..d073599 Binary files /dev/null and b/docs/images/result_graph.png differ diff --git a/docs/images/worker_graph_full.png b/docs/images/worker_graph_full.png new file mode 100644 index 0000000..867bcfb Binary files /dev/null and b/docs/images/worker_graph_full.png differ diff --git a/docs/includes/installation.txt b/docs/includes/installation.txt new file mode 100644 index 0000000..ffc9ede --- /dev/null +++ b/docs/includes/installation.txt @@ -0,0 +1,148 @@ +.. _celery-installation: + +Installation +============ + +You can install Celery either via the Python Package Index (PyPI) +or from source. + +To install using `pip`,:: + + $ pip install -U Celery + +To install using `easy_install`,:: + + $ easy_install -U Celery + +.. _bundles: + +Bundles +------- + +Celery also defines a group of bundles that can be used +to install Celery and the dependencies for a given feature. + +You can specify these in your requirements or on the ``pip`` comand-line +by using brackets. Multiple bundles can be specified by separating them by +commas. + +.. code-block:: bash + + $ pip install "celery[librabbitmq]" + + $ pip install "celery[librabbitmq,redis,auth,msgpack]" + +The following bundles are available: + +Serializers +~~~~~~~~~~~ + +:celery[auth]: + for using the auth serializer. + +:celery[msgpack]: + for using the msgpack serializer. + +:celery[yaml]: + for using the yaml serializer. + +Concurrency +~~~~~~~~~~~ + +:celery[eventlet]: + for using the eventlet pool. + +:celery[gevent]: + for using the gevent pool. + +:celery[threads]: + for using the thread pool. + +Transports and Backends +~~~~~~~~~~~~~~~~~~~~~~~ + +:celery[librabbitmq]: + for using the librabbitmq C library. + +:celery[redis]: + for using Redis as a message transport or as a result backend. + +:celery[mongodb]: + for using MongoDB as a message transport (*experimental*), + or as a result backend (*supported*). + +:celery[sqs]: + for using Amazon SQS as a message transport (*experimental*). + +:celery[memcache]: + for using memcached as a result backend. + +:celery[cassandra]: + for using Apache Cassandra as a result backend. + +:celery[couchdb]: + for using CouchDB as a message transport (*experimental*). + +:celery[couchbase]: + for using CouchBase as a result backend. + +:celery[beanstalk]: + for using Beanstalk as a message transport (*experimental*). + +:celery[zookeeper]: + for using Zookeeper as a message transport. + +:celery[zeromq]: + for using ZeroMQ as a message transport (*experimental*). + +:celery[sqlalchemy]: + for using SQLAlchemy as a message transport (*experimental*), + or as a result backend (*supported*). + +:celery[pyro]: + for using the Pyro4 message transport (*experimental*). + +:celery[slmq]: + for using the SoftLayer Message Queue transport (*experimental*). + +.. _celery-installing-from-source: + +Downloading and installing from source +-------------------------------------- + +Download the latest version of Celery from +http://pypi.python.org/pypi/celery/ + +You can install it by doing the following,:: + + $ tar xvfz celery-0.0.0.tar.gz + $ cd celery-0.0.0 + $ python setup.py build + # python setup.py install + +The last command must be executed as a privileged user if +you are not currently using a virtualenv. + +.. _celery-installing-from-git: + +Using the development version +----------------------------- + +With pip +~~~~~~~~ + +The Celery development version also requires the development +versions of ``kombu``, ``amqp`` and ``billiard``. + +You can install the latest snapshot of these using the following +pip commands:: + + $ pip install https://github.com/celery/celery/zipball/master#egg=celery + $ pip install https://github.com/celery/billiard/zipball/master#egg=billiard + $ pip install https://github.com/celery/py-amqp/zipball/master#egg=amqp + $ pip install https://github.com/celery/kombu/zipball/master#egg=kombu + +With git +~~~~~~~~ + +Please the Contributing section. diff --git a/docs/includes/introduction.txt b/docs/includes/introduction.txt new file mode 100644 index 0000000..340af0b --- /dev/null +++ b/docs/includes/introduction.txt @@ -0,0 +1,200 @@ +:Version: 3.1.13 (Cipater) +:Web: http://celeryproject.org/ +:Download: http://pypi.python.org/pypi/celery/ +:Source: http://github.com/celery/celery/ +:Keywords: task queue, job queue, asynchronous, async, rabbitmq, amqp, redis, + python, webhooks, queue, distributed + +-- + +What is a Task Queue? +===================== + +Task queues are used as a mechanism to distribute work across threads or +machines. + +A task queue's input is a unit of work, called a task, dedicated worker +processes then constantly monitor the queue for new work to perform. + +Celery communicates via messages, usually using a broker +to mediate between clients and workers. To initiate a task a client puts a +message on the queue, the broker then delivers the message to a worker. + +A Celery system can consist of multiple workers and brokers, giving way +to high availability and horizontal scaling. + +Celery is a library written in Python, but the protocol can be implemented in +any language. So far there's RCelery_ for the Ruby programming language, and a +`PHP client`, but language interoperability can also be achieved +by using webhooks. + +.. _RCelery: http://leapfrogdevelopment.github.com/rcelery/ +.. _`PHP client`: https://github.com/gjedeer/celery-php +.. _`using webhooks`: + http://docs.celeryproject.org/en/latest/userguide/remote-tasks.html + +What do I need? +=============== + +Celery version 3.0 runs on, + +- Python (2.5, 2.6, 2.7, 3.2, 3.3) +- PyPy (1.8, 1.9) +- Jython (2.5, 2.7). + +This is the last version to support Python 2.5, +and from Celery 3.1, Python 2.6 or later is required. +The last version to support Python 2.4 was Celery series 2.2. + +*Celery* is usually used with a message broker to send and receive messages. +The RabbitMQ, Redis transports are feature complete, +but there's also experimental support for a myriad of other solutions, including +using SQLite for local development. + +*Celery* can run on a single machine, on multiple machines, or even +across datacenters. + +Get Started +=========== + +If this is the first time you're trying to use Celery, or you are +new to Celery 3.0 coming from previous versions then you should read our +getting started tutorials: + +- `First steps with Celery`_ + + Tutorial teaching you the bare minimum needed to get started with Celery. + +- `Next steps`_ + + A more complete overview, showing more features. + +.. _`First steps with Celery`: + http://docs.celeryproject.org/en/latest/getting-started/first-steps-with-celery.html + +.. _`Next steps`: + http://docs.celeryproject.org/en/latest/getting-started/next-steps.html + +Celery is… +========== + +- **Simple** + + Celery is easy to use and maintain, and does *not need configuration files*. + + It has an active, friendly community you can talk to for support, + including a `mailing-list`_ and and an IRC channel. + + Here's one of the simplest applications you can make:: + + from celery import Celery + + app = Celery('hello', broker='amqp://guest@localhost//') + + @app.task + def hello(): + return 'hello world' + +- **Highly Available** + + Workers and clients will automatically retry in the event + of connection loss or failure, and some brokers support + HA in way of *Master/Master* or *Master/Slave* replication. + +- **Fast** + + A single Celery process can process millions of tasks a minute, + with sub-millisecond round-trip latency (using RabbitMQ, + py-librabbitmq, and optimized settings). + +- **Flexible** + + Almost every part of *Celery* can be extended or used on its own, + Custom pool implementations, serializers, compression schemes, logging, + schedulers, consumers, producers, autoscalers, broker transports and much more. + +It supports… +============ + + - **Message Transports** + + - RabbitMQ_, Redis_, + - MongoDB_ (experimental), Amazon SQS (experimental), + - CouchDB_ (experimental), SQLAlchemy_ (experimental), + - Django ORM (experimental), `IronMQ`_ + - and more… + + - **Concurrency** + + - Prefork, Eventlet_, gevent_, threads/single threaded + + - **Result Stores** + + - AMQP, Redis + - memcached, MongoDB + - SQLAlchemy, Django ORM + - Apache Cassandra, IronCache + + - **Serialization** + + - *pickle*, *json*, *yaml*, *msgpack*. + - *zlib*, *bzip2* compression. + - Cryptographic message signing. + +.. _`Eventlet`: http://eventlet.net/ +.. _`gevent`: http://gevent.org/ + +.. _RabbitMQ: http://rabbitmq.com +.. _Redis: http://redis.io +.. _MongoDB: http://mongodb.org +.. _Beanstalk: http://kr.github.com/beanstalkd +.. _CouchDB: http://couchdb.apache.org +.. _SQLAlchemy: http://sqlalchemy.org +.. _`IronMQ`: http://iron.io + +Framework Integration +===================== + +Celery is easy to integrate with web frameworks, some of which even have +integration packages: + + +--------------------+------------------------+ + | `Django`_ | not needed | + +--------------------+------------------------+ + | `Pyramid`_ | `pyramid_celery`_ | + +--------------------+------------------------+ + | `Pylons`_ | `celery-pylons`_ | + +--------------------+------------------------+ + | `Flask`_ | not needed | + +--------------------+------------------------+ + | `web2py`_ | `web2py-celery`_ | + +--------------------+------------------------+ + | `Tornado`_ | `tornado-celery`_ | + +--------------------+------------------------+ + +The integration packages are not strictly necessary, but they can make +development easier, and sometimes they add important hooks like closing +database connections at ``fork``. + +.. _`Django`: http://djangoproject.com/ +.. _`Pylons`: http://pylonshq.com/ +.. _`Flask`: http://flask.pocoo.org/ +.. _`web2py`: http://web2py.com/ +.. _`Bottle`: http://bottlepy.org/ +.. _`Pyramid`: http://docs.pylonsproject.org/en/latest/docs/pyramid.html +.. _`pyramid_celery`: http://pypi.python.org/pypi/pyramid_celery/ +.. _`django-celery`: http://pypi.python.org/pypi/django-celery +.. _`celery-pylons`: http://pypi.python.org/pypi/celery-pylons +.. _`web2py-celery`: http://code.google.com/p/web2py-celery/ +.. _`Tornado`: http://www.tornadoweb.org/ +.. _`tornado-celery`: http://github.com/mher/tornado-celery/ + +.. _celery-documentation: + +Documentation +============= + +The `latest documentation`_ with user guides, tutorials and API reference +is hosted at Read The Docs. + +.. _`latest documentation`: http://docs.celeryproject.org/en/latest/ diff --git a/docs/includes/resources.txt b/docs/includes/resources.txt new file mode 100644 index 0000000..e263e2e --- /dev/null +++ b/docs/includes/resources.txt @@ -0,0 +1,66 @@ +.. _getting-help: + +Getting Help +============ + +.. _mailing-list: + +Mailing list +------------ + +For discussions about the usage, development, and future of celery, +please join the `celery-users`_ mailing list. + +.. _`celery-users`: http://groups.google.com/group/celery-users/ + +.. _irc-channel: + +IRC +--- + +Come chat with us on IRC. The **#celery** channel is located at the `Freenode`_ +network. + +.. _`Freenode`: http://freenode.net + +.. _bug-tracker: + +Bug tracker +=========== + +If you have any suggestions, bug reports or annoyances please report them +to our issue tracker at http://github.com/celery/celery/issues/ + +.. _wiki: + +Wiki +==== + +http://wiki.github.com/celery/celery/ + +.. _contributing-short: + +Contributing +============ + +Development of `celery` happens at Github: http://github.com/celery/celery + +You are highly encouraged to participate in the development +of `celery`. If you don't like Github (for some reason) you're welcome +to send regular patches. + +Be sure to also read the `Contributing to Celery`_ section in the +documentation. + +.. _`Contributing to Celery`: + http://docs.celeryproject.org/en/master/contributing.html + +.. _license: + +License +======= + +This software is licensed under the `New BSD License`. See the :file:`LICENSE` +file in the top distribution directory for the full license text. + +.. # vim: syntax=rst expandtab tabstop=4 shiftwidth=4 shiftround diff --git a/docs/index.rst b/docs/index.rst new file mode 100644 index 0000000..86e4794 --- /dev/null +++ b/docs/index.rst @@ -0,0 +1,67 @@ +================================= + Celery - Distributed Task Queue +================================= + +Celery is a simple, flexible and reliable distributed system to +process vast amounts of messages, while providing operations with +the tools required to maintain such a system. + +It's a task queue with focus on real-time processing, while also +supporting task scheduling. + +Celery has a large and diverse community of users and contributors, +you should come join us :ref:`on IRC ` +or :ref:`our mailing-list `. + +Celery is Open Source and licensed under the `BSD License`_. + +Getting Started +=============== + +- If you are new to Celery you can get started by following + the :ref:`first-steps` tutorial. + +- You can also check out the :ref:`FAQ `. + +.. _`BSD License`: http://www.opensource.org/licenses/BSD-3-Clause + +Contents +======== + +.. toctree:: + :maxdepth: 1 + + copyright + +.. toctree:: + :maxdepth: 2 + + getting-started/index + userguide/index + +.. toctree:: + :maxdepth: 1 + + configuration + django/index + contributing + community + tutorials/index + faq + changelog + whatsnew-3.1 + whatsnew-3.0 + whatsnew-2.5 + reference/index + internals/index + history/index + glossary + + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` + diff --git a/docs/internals/app-overview.rst b/docs/internals/app-overview.rst new file mode 100644 index 0000000..33dd4e8 --- /dev/null +++ b/docs/internals/app-overview.rst @@ -0,0 +1,254 @@ +============================= + "The Big Instance" Refactor +============================= + +The `app` branch is a work-in-progress to remove +the use of a global configuration in Celery. + +Celery can now be instantiated, which means several +instances of Celery may exist in the same process space. +Also, large parts can be customized without resorting to monkey +patching. + +Examples +======== + +Creating a Celery instance:: + + >>> from celery import Celery + >>> app = Celery() + >>> app.config_from_object("celeryconfig") + >>> #app.config_from_envvar("CELERY_CONFIG_MODULE") + + +Creating tasks: + +.. code-block:: python + + @app.task + def add(x, y): + return x + y + + +Creating custom Task subclasses: + +.. code-block:: python + + Task = celery.create_task_cls() + + class DebugTask(Task): + abstract = True + + def on_failure(self, *args, **kwargs): + import pdb + pdb.set_trace() + + @app.task(base=DebugTask) + def add(x, y): + return x + y + +Starting a worker: + +.. code-block:: python + + worker = celery.Worker(loglevel="INFO") + +Getting access to the configuration: + +.. code-block:: python + + celery.conf.CELERY_ALWAYS_EAGER = True + celery.conf["CELERY_ALWAYS_EAGER"] = True + + +Controlling workers:: + + >>> celery.control.inspect().active() + >>> celery.control.rate_limit(add.name, "100/m") + >>> celery.control.broadcast("shutdown") + >>> celery.control.discard_all() + +Other interesting attributes:: + + # Establish broker connection. + >>> celery.broker_connection() + + # AMQP Specific features. + >>> celery.amqp + >>> celery.amqp.Router + >>> celery.amqp.get_queues() + >>> celery.amqp.get_task_consumer() + + # Loader + >>> celery.loader + + # Default backend + >>> celery.backend + + +As you can probably see, this really opens up another +dimension of customization abilities. + +Deprecations +============ + +* celery.task.ping + celery.task.PingTask + + Inferior to the ping remote control command. + Will be removed in Celery 2.3. + +Removed deprecations +==================== + +* `celery.utils.timedelta_seconds` + Use: :func:`celery.utils.timeutils.timedelta_seconds` + +* `celery.utils.defaultdict` + Use: :func:`celery.utils.compat.defaultdict` + +* `celery.utils.all` + Use: :func:`celery.utils.compat.all` + +* `celery.task.apply_async` + Use app.send_task + +* `celery.task.tasks` + Use :data:`celery.registry.tasks` + +Aliases (Pending deprecation) +============================= + +* celery.task.base + * .Task -> {app.create_task_cls} + +* celery.task.sets + * .TaskSet -> {app.TaskSet} + +* celery.decorators / celery.task + * .task -> {app.task} + +* celery.execute + * .apply_async -> {task.apply_async} + * .apply -> {task.apply} + * .send_task -> {app.send_task} + * .delay_task -> no alternative + +* celery.log + * .get_default_logger -> {app.log.get_default_logger} + * .setup_logger -> {app.log.setup_logger} + * .get_task_logger -> {app.log.get_task_logger} + * .setup_task_logger -> {app.log.setup_task_logger} + * .setup_logging_subsystem -> {app.log.setup_logging_subsystem} + * .redirect_stdouts_to_logger -> {app.log.redirect_stdouts_to_logger} + +* celery.messaging + * .establish_connection -> {app.broker_connection} + * .with_connection -> {app.with_connection} + * .get_consumer_set -> {app.amqp.get_task_consumer} + * .TaskPublisher -> {app.amqp.TaskPublisher} + * .TaskConsumer -> {app.amqp.TaskConsumer} + * .ConsumerSet -> {app.amqp.ConsumerSet} + +* celery.conf.* -> {app.conf} + + **NOTE**: All configuration keys are now named the same + as in the configuration. So the key "CELERY_ALWAYS_EAGER" + is accessed as:: + + >>> app.conf.CELERY_ALWAYS_EAGER + + instead of:: + + >>> from celery import conf + >>> conf.ALWAYS_EAGER + + * .get_queues -> {app.amqp.get_queues} + +* celery.task.control + * .broadcast -> {app.control.broadcast} + * .rate_limit -> {app.control.rate_limit} + * .ping -> {app.control.ping} + * .revoke -> {app.control.revoke} + * .discard_all -> {app.control.discard_all} + * .inspect -> {app.control.inspect} + +* celery.utils.info + * .humanize_seconds -> celery.utils.timeutils.humanize_seconds + * .textindent -> celery.utils.textindent + * .get_broker_info -> {app.amqp.get_broker_info} + * .format_broker_info -> {app.amqp.format_broker_info} + * .format_queues -> {app.amqp.format_queues} + +Default App Usage +================= + +To be backward compatible, it must be possible +to use all the classes/functions without passing +an explicit app instance. + +This is achieved by having all app-dependent objects +use :data:`~celery.app.default_app` if the app instance +is missing. + +.. code-block:: python + + from celery.app import app_or_default + + class SomeClass(object): + + def __init__(self, app=None): + self.app = app_or_default(app) + +The problem with this approach is that there is a chance +that the app instance is lost along the way, and everything +seems to be working normally. Testing app instance leaks +is hard. The environment variable :envvar:`CELERY_TRACE_APP` +can be used, when this is enabled :func:`celery.app.app_or_default` +will raise an exception whenever it has to go back to the default app +instance. + +App Dependency Tree +------------------- + +* {app} + * celery.loaders.base.BaseLoader + * celery.backends.base.BaseBackend + * {app.TaskSet} + * celery.task.sets.TaskSet (app.TaskSet) + * [app.TaskSetResult] + * celery.result.TaskSetResult (app.TaskSetResult) + +* {app.AsyncResult} + * celery.result.BaseAsyncResult / celery.result.AsyncResult + +* celery.bin.worker.WorkerCommand + * celery.apps.worker.Worker + * celery.worker.WorkerController + * celery.worker.consumer.Consumer + * celery.worker.job.TaskRequest + * celery.events.EventDispatcher + * celery.worker.control.ControlDispatch + * celery.woker.control.registry.Panel + * celery.pidbox.BroadcastPublisher + * celery.pidbox.BroadcastConsumer + * celery.worker.controllers.Mediator + * celery.beat.EmbeddedService + +* celery.bin.events.EvCommand + * celery.events.snapshot.evcam + * celery.events.snapshot.Polaroid + * celery.events.EventReceiver + * celery.events.cursesmon.evtop + * celery.events.EventReceiver + * celery.events.cursesmon.CursesMonitor + * celery.events.dumper + * celery.events.EventReceiver + +* celery.bin.amqp.AMQPAdmin + +* celery.bin.beat.BeatCommand + * celery.apps.beat.Beat + * celery.beat.Service + * celery.beat.Scheduler + diff --git a/docs/internals/deprecation.rst b/docs/internals/deprecation.rst new file mode 100644 index 0000000..687c5ed --- /dev/null +++ b/docs/internals/deprecation.rst @@ -0,0 +1,306 @@ +.. _deprecation-timeline: + +============================= + Celery Deprecation Timeline +============================= + +.. contents:: + :local: + +.. _deprecations-v3.2: + +Removals for version 3.2 +======================== + +- Module ``celery.task.trace`` has been renamed to ``celery.app.trace`` + as the ``celery.task`` package is being phased out. The compat module + will be removed in version 3.2 so please change any import from:: + + from celery.task.trace import … + + to:: + + from celery.app.trace import … + +- ``AsyncResult.serializable()`` and ``celery.result.from_serializable`` + will be removed. + + Use instead:: + + >>> tup = result.as_tuple() + >>> from celery.result import result_from_tuple + >>> result = result_from_tuple(tup) + +.. _deprecations-v4.0: + +Removals for version 4.0 +======================== + +Old Task API +------------ + +.. _deprecate-compat-task-modules: + +Compat Task Modules +~~~~~~~~~~~~~~~~~~~ + +- Module ``celery.decorators`` will be removed: + + Which means you need to change:: + + from celery.decorators import task + +Into:: + + from celery import task + +- Module ``celery.task`` *may* be removed (not decided) + + This means you should change:: + + from celery.task import task + + into:: + + from celery import task + + -- and:: + + from celery.task import Task + + into:: + + from celery import Task + + +Note that the new :class:`~celery.Task` class no longer +uses classmethods for these methods: + + - delay + - apply_async + - retry + - apply + - AsyncResult + - subtask + +This also means that you can't call these methods directly +on the class, but have to instantiate the task first:: + + >>> MyTask.delay() # NO LONGER WORKS + + + >>> MyTask().delay() # WORKS! + + +TaskSet +~~~~~~~ + +TaskSet has been renamed to group and TaskSet will be removed in version 4.0. + +Old:: + + >>> from celery.task import TaskSet + + >>> TaskSet(add.subtask((i, i)) for i in xrange(10)).apply_async() + +New:: + + >>> from celery import group + >>> group(add.s(i, i) for i in xrange(10))() + + +Magic keyword arguments +~~~~~~~~~~~~~~~~~~~~~~~ + +The magic keyword arguments accepted by tasks will be removed +in 4.0, so you should start rewriting any tasks +using the ``celery.decorators`` module and depending +on keyword arguments being passed to the task, +for example:: + + from celery.decorators import task + + @task() + def add(x, y, task_id=None): + print("My task id is %r" % (task_id, )) + +should be rewritten into:: + + from celery import task + + @task(bind=True) + def add(self, x, y): + print("My task id is {0.request.id}".format(self)) + + +Task attributes +--------------- + +The task attributes: + +- ``queue`` +- ``exchange`` +- ``exchange_type`` +- ``routing_key`` +- ``delivery_mode`` +- ``priority`` + +is deprecated and must be set by :setting:`CELERY_ROUTES` instead. + +:mod:`celery.result` +-------------------- + +- ``BaseAsyncResult`` -> ``AsyncResult``. + +- ``TaskSetResult`` -> ``GroupResult``. + +- ``TaskSetResult.total`` -> ``len(GroupResult)`` + +- ``TaskSetResult.taskset_id`` -> ``GroupResult.id`` + +Apply to: :class:`~celery.result.AsyncResult`, +:class:`~celery.result.EagerResult`:: + +- ``Result.wait()`` -> ``Result.get()`` + +- ``Result.task_id()`` -> ``Result.id`` + +- ``Result.status`` -> ``Result.state``. + +:mod:`celery.loader` +-------------------- + +- ``current_loader()`` -> ``current_app.loader`` + +- ``load_settings()`` -> ``current_app.conf`` + + +Task_sent signal +---------------- + +The :signal:`task_sent` signal will be removed in version 4.0. +Please use the :signal:`before_task_publish` and :signal:`after_task_publush` +signals instead. + + +Modules to Remove +----------------- + +- ``celery.execute`` + + This module only contains ``send_task``, which must be replaced with + :attr:`@send_task` instead. + +- ``celery.decorators`` + + See :ref:`deprecate-compat-task-modules` + +- ``celery.log`` + + Use :attr:`@log` instead. + +- ``celery.messaging`` + + Use :attr:`@amqp` instead. + +- ``celery.registry`` + + Use :mod:`celery.app.registry` instead. + +- ``celery.task.control`` + + Use :attr:`@control` instead. + +- ``celery.task.schedules`` + + Use :mod:`celery.schedules` instead. + +- ``celery.task.chords`` + + Use :func:`celery.chord` instead. + +Settings +-------- + +``BROKER`` Settings +~~~~~~~~~~~~~~~~~~~ + +===================================== ===================================== +**Setting name** **Replace with** +===================================== ===================================== +``BROKER_HOST`` :setting:`BROKER_URL` +``BROKER_PORT`` :setting:`BROKER_URL` +``BROKER_USER`` :setting:`BROKER_URL` +``BROKER_PASSWORD`` :setting:`BROKER_URL` +``BROKER_VHOST`` :setting:`BROKER_URL` +===================================== ===================================== + + +``REDIS`` Result Backend Settings +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +===================================== ===================================== +**Setting name** **Replace with** +===================================== ===================================== +``CELERY_REDIS_HOST`` :setting:`CELERY_RESULT_BACKEND` +``CELERY_REDIS_PORT`` :setting:`CELERY_RESULT_BACKEND` +``CELERY_REDIS_DB`` :setting:`CELERY_RESULT_BACKEND` +``CELERY_REDIS_PASSWORD`` :setting:`CELERY_RESULT_BACKEND` +``REDIS_HOST`` :setting:`CELERY_RESULT_BACKEND` +``REDIS_PORT`` :setting:`CELERY_RESULT_BACKEND` +``REDIS_DB`` :setting:`CELERY_RESULT_BACKEND` +``REDIS_PASSWORD`` :setting:`CELERY_RESULT_BACKEND` +===================================== ===================================== + +Logging Settings +~~~~~~~~~~~~~~~~ + +===================================== ===================================== +**Setting name** **Replace with** +===================================== ===================================== +``CELERYD_LOG_LEVEL`` :option:`--loglevel` +``CELERYD_LOG_FILE`` :option:`--logfile`` +``CELERYBEAT_LOG_LEVEL`` :option:`--loglevel` +``CELERYBEAT_LOG_FILE`` :option:`--loglevel`` +``CELERYMON_LOG_LEVEL`` :option:`--loglevel` +``CELERYMON_LOG_FILE`` :option:`--loglevel`` +===================================== ===================================== + +Other Settings +~~~~~~~~~~~~~~ + +===================================== ===================================== +**Setting name** **Replace with** +===================================== ===================================== +``CELERY_TASK_ERROR_WITELIST`` Annotate ``Task.ErrorMail`` +``CELERY_AMQP_TASK_RESULT_EXPIRES`` :setting:`CELERY_TASK_RESULT_EXPIRES` +===================================== ===================================== + + +.. _deprecations-v2.0: + +Removals for version 2.0 +======================== + +* The following settings will be removed: + +===================================== ===================================== +**Setting name** **Replace with** +===================================== ===================================== +`CELERY_AMQP_CONSUMER_QUEUES` `CELERY_QUEUES` +`CELERY_AMQP_CONSUMER_QUEUES` `CELERY_QUEUES` +`CELERY_AMQP_EXCHANGE` `CELERY_DEFAULT_EXCHANGE` +`CELERY_AMQP_EXCHANGE_TYPE` `CELERY_DEFAULT_AMQP_EXCHANGE_TYPE` +`CELERY_AMQP_CONSUMER_ROUTING_KEY` `CELERY_QUEUES` +`CELERY_AMQP_PUBLISHER_ROUTING_KEY` `CELERY_DEFAULT_ROUTING_KEY` +===================================== ===================================== + +* :envvar:`CELERY_LOADER` definitions without class name. + + E.g. `celery.loaders.default`, needs to include the class name: + `celery.loaders.default.Loader`. + +* :meth:`TaskSet.run`. Use :meth:`celery.task.base.TaskSet.apply_async` + instead. + +* The module :mod:`celery.task.rest`; use :mod:`celery.task.http` instead. diff --git a/docs/internals/guide.rst b/docs/internals/guide.rst new file mode 100644 index 0000000..941f7b1 --- /dev/null +++ b/docs/internals/guide.rst @@ -0,0 +1,307 @@ +.. _internals-guide: + +================================ + Contributors Guide to the Code +================================ + +.. contents:: + :local: + +Philosophy +========== + +The API>RCP Precedence Rule +--------------------------- + +- The API is more important than Readability +- Readability is more important than Convention +- Convention is more important than Performance + - …unless the code is a proven hotspot. + +More important than anything else is the end-user API. +Conventions must step aside, and any suffering is always alleviated +if the end result is a better API. + +Conventions and Idioms Used +=========================== + +Classes +------- + +Naming +~~~~~~ + +- Follows :pep:`8`. + +- Class names must be `CamelCase`. +- but not if they are verbs, verbs shall be `lower_case`: + + .. code-block:: python + + # - test case for a class + class TestMyClass(Case): # BAD + pass + + class test_MyClass(Case): # GOOD + pass + + # - test case for a function + class TestMyFunction(Case): # BAD + pass + + class test_my_function(Case): # GOOD + pass + + # - "action" class (verb) + class UpdateTwitterStatus(object): # BAD + pass + + class update_twitter_status(object): # GOOD + pass + + .. note:: + + Sometimes it makes sense to have a class mask as a function, + and there is precedence for this in the stdlib (e.g. + :class:`~contextlib.contextmanager`). Celery examples include + :class:`~celery.subtask`, :class:`~celery.chord`, + ``inspect``, :class:`~kombu.utils.functional.promise` and more.. + +- Factory functions and methods must be `CamelCase` (excluding verbs): + + .. code-block:: python + + class Celery(object): + + def consumer_factory(self): # BAD + ... + + def Consumer(self): # GOOD + ... + +Default values +~~~~~~~~~~~~~~ + +Class attributes serve as default values for the instance, +as this means that they can be set by either instantiation or inheritance. + +**Example:** + +.. code-block:: python + + class Producer(object): + active = True + serializer = 'json' + + def __init__(self, serializer=None): + self.serializer = serializer or self.serializer + + # must check for None when value can be false-y + self.active = active if active is not None else self.active + +A subclass can change the default value: + +.. code-block:: python + + TaskProducer(Producer): + serializer = 'pickle' + +and the value can be set at instantiation: + +.. code-block:: python + + >>> producer = TaskProducer(serializer='msgpack') + +Exceptions +~~~~~~~~~~ + +Custom exceptions raised by an objects methods and properties +should be available as an attribute and documented in the +method/property that throw. + +This way a user doesn't have to find out where to import the +exception from, but rather use ``help(obj)`` and access +the exception class from the instance directly. + +**Example**: + +.. code-block:: python + + class Empty(Exception): + pass + + class Queue(object): + Empty = Empty + + def get(self): + """Get the next item from the queue. + + :raises Queue.Empty: if there are no more items left. + + """ + try: + return self.queue.popleft() + except IndexError: + raise self.Empty() + +Composites +~~~~~~~~~~ + +Similarly to exceptions, composite classes should be override-able by +inheritance and/or instantiation. Common sense can be used when +selecting what classes to include, but often it's better to add one +too many: predicting what users need to override is hard (this has +saved us from many a monkey patch). + +**Example**: + +.. code-block:: python + + class Worker(object): + Consumer = Consumer + + def __init__(self, connection, consumer_cls=None): + self.Consumer = consumer_cls or self.Consumer + + def do_work(self): + with self.Consumer(self.connection) as consumer: + self.connection.drain_events() + +Applications vs. "single mode" +============================== + +In the beginning Celery was developed for Django, simply because +this enabled us get the project started quickly, while also having +a large potential user base. + +In Django there is a global settings object, so multiple Django projects +can't co-exist in the same process space, this later posed a problem +for using Celery with frameworks that doesn't have this limitation. + +Therefore the app concept was introduced. When using apps you use 'celery' +objects instead of importing things from celery submodules, this +(unfortunately) also means that Celery essentially has two API's. + +Here's an example using Celery in single-mode: + +.. code-block:: python + + from celery import task + from celery.task.control import inspect + + from .models import CeleryStats + + @task + def write_stats_to_db(): + stats = inspect().stats(timeout=1) + for node_name, reply in stats: + CeleryStats.objects.update_stat(node_name, stats) + + +and here's the same using Celery app objects: + +.. code-block:: python + + from .celery import celery + from .models import CeleryStats + + @app.task + def write_stats_to_db(): + stats = celery.control.inspect().stats(timeout=1) + for node_name, reply in stats: + CeleryStats.objects.update_stat(node_name, stats) + + +In the example above the actual application instance is imported +from a module in the project, this module could look something like this: + +.. code-block:: python + + from celery import Celery + + app = Celery(broker='amqp://') + + +Module Overview +=============== + +- celery.app + + This is the core of Celery: the entry-point for all functionality. + +- celery.loaders + + Every app must have a loader. The loader decides how configuration + is read, what happens when the worker starts, when a task starts and ends, + and so on. + + The loaders included are: + + - app + + Custom celery app instances uses this loader by default. + + - default + + "single-mode" uses this loader by default. + + Extension loaders also exist, like ``django-celery``, ``celery-pylons`` + and so on. + +- celery.worker + + This is the worker implementation. + +- celery.backends + + Task result backends live here. + +- celery.apps + + Major user applications: worker and beat. + The command-line wrappers for these are in celery.bin (see below) + +- celery.bin + + Command-line applications. + setup.py creates setuptools entrypoints for these. + +- celery.concurrency + + Execution pool implementations (prefork, eventlet, gevent, threads). + +- celery.db + + Database models for the SQLAlchemy database result backend. + (should be moved into :mod:`celery.backends.database`) + +- celery.events + + Sending and consuming monitoring events, also includes curses monitor, + event dumper and utilities to work with in-memory cluster state. + +- celery.execute.trace + + How tasks are executed and traced by the worker, and in eager mode. + +- celery.security + + Security related functionality, currently a serializer using + cryptographic digests. + +- celery.task + + single-mode interface to creating tasks, and controlling workers. + +- celery.tests + + The unittest suite. + +- celery.utils + + Utility functions used by the celery code base. + Much of it is there to be compatible across Python versions. + +- celery.contrib + + Additional public code that doesn't fit into any other namespace. diff --git a/docs/internals/index.rst b/docs/internals/index.rst new file mode 100644 index 0000000..d10ed01 --- /dev/null +++ b/docs/internals/index.rst @@ -0,0 +1,19 @@ +.. _internals: + +=========== + Internals +=========== + +:Release: |version| +:Date: |today| + +.. toctree:: + :maxdepth: 2 + + guide + deprecation + worker + protocol + protov2 + app-overview + reference/index diff --git a/docs/internals/protocol.rst b/docs/internals/protocol.rst new file mode 100644 index 0000000..f80e6e8 --- /dev/null +++ b/docs/internals/protocol.rst @@ -0,0 +1,148 @@ +.. _internals-task-message-protocol: + +.. _task-message-protocol-v1: + +======================= + Task Messages +======================= + +.. contents:: + :local: + +Message format +============== + +* task + :`string`: + + Name of the task. **required** + +* id + :`string`: + + Unique id of the task (UUID). **required** + +* args + :`list`: + + List of arguments. Will be an empty list if not provided. + +* kwargs + :`dictionary`: + + Dictionary of keyword arguments. Will be an empty dictionary if not + provided. + +* retries + :`int`: + + Current number of times this task has been retried. + Defaults to `0` if not specified. + +* eta + :`string` (ISO 8601): + + Estimated time of arrival. This is the date and time in ISO 8601 + format. If not provided the message is not scheduled, but will be + executed asap. + +* expires + :`string` (ISO 8601): + + .. versionadded:: 2.0.2 + + Expiration date. This is the date and time in ISO 8601 format. + If not provided the message will never expire. The message + will be expired when the message is received and the expiration date + has been exceeded. + +Extensions +========== + +Extensions are additional keys in the message body that the worker may or +may not support. If the worker finds an extension key it doesn't support +it should optimally reject the message so another worker gets a chance +to process it. + + +* taskset + :`string`: + + The taskset this task is part of (if any). + +* chord + :`subtask`: + + .. versionadded:: 2.3 + + Signifies that this task is one of the header parts of a chord. The value + of this key is the body of the cord that should be executed when all of + the tasks in the header has returned. + +* utc + :`bool`: + + .. versionadded:: 2.5 + + If true time uses the UTC timezone, if not the current local timezone + should be used. + +* callbacks + :`subtask`: + + .. versionadded:: 3.0 + + A list of subtasks to apply if the task exited successfully. + +* errbacks + :`subtask`: + + .. versionadded:: 3.0 + + A list of subtasks to apply if an error occurs while executing the task. + +* timelimit + :`(float, float)`: + + .. versionadded:: 3.1 + + Task execution time limit settings. This is a tuple of hard and soft time + limit value (`int`/`float` or :const:`None` for no limit). + + Example value specifying a soft time limit of 3 seconds, and a hard time + limt of 10 seconds:: + + {'timelimit': (3.0, 10.0)} + + +Example message +=============== + +This is an example invocation of the `celery.task.PingTask` task in JSON +format: + +.. code-block:: javascript + + {"id": "4cc7438e-afd4-4f8f-a2f3-f46567e7ca77", + "task": "celery.task.PingTask", + "args": [], + "kwargs": {}, + "retries": 0, + "eta": "2009-11-17T12:30:56.527191"} + +Serialization +============= + +Several types of serialization formats are supported using the +`content_type` message header. + +The MIME-types supported by default are shown in the following table. + + =============== ================================= + Scheme MIME Type + =============== ================================= + json application/json + yaml application/x-yaml + pickle application/x-python-serialize + msgpack application/x-msgpack + =============== ================================= diff --git a/docs/internals/protov2.rst b/docs/internals/protov2.rst new file mode 100644 index 0000000..e0bb1ff --- /dev/null +++ b/docs/internals/protov2.rst @@ -0,0 +1,146 @@ +.. _protov2draft: + +======================================== + Task Message Protocol v2 (Draft Spec.) +======================================== + +Notes +===== + +- Support for multiple languages via the ``lang`` header. + + Worker may redirect the message to a worker that supports + the language. + +- Metadata moved to headers. + + This means that workers/intermediates can inspect the message + and make decisions based on the headers without decoding + the payload (which may be language specific, e.g. serialized by the + Python specific pickle serializer). + +- Body is only for language specific data. + + - Python stores args/kwargs in body. + + - If a message uses raw encoding then the raw data + will be passed as a single argument to the function. + + - Java/C, etc. can use a thrift/protobuf document as the body + +- Dispatches to actor based on ``c_type``, ``c_meth`` headers + + ``c_meth`` is unused by python, but may be used in the future + to specify class+method pairs. + +- Chain gains a dedicated field. + + Reducing the chain into a recursive ``callbacks`` argument + causes problems when the recursion limit is exceeded. + + This is fixed in the new message protocol by specifying + a list of signatures, each task will then pop a task off the list + when sending the next message:: + + execute_task(message) + chain = message.headers['chain'] + if chain: + sig = maybe_signature(chain.pop()) + sig.apply_async(chain=chain) + +- ``correlation_id`` replaces ``task_id`` field. + + +- ``c_shadow`` lets you specify a different name for logs, monitors + can be used for e.g. meta tasks that calls any function:: + + from celery.utils.imports import qualname + + class PickleTask(Task): + abstract = True + + def unpack_args(self, fun, args=()): + return fun, args + + def apply_async(self, args, kwargs, **options): + fun, real_args = self.unpack_args(*args) + return super(PickleTask, self).apply_async( + (fun, real_args, kwargs), shadow=qualname(fun), **options + ) + + @app.task(base=PickleTask) + def call(fun, args, kwargs): + return fun(*args, **kwargs) + + + +Undecided +--------- + +- May consider moving callbacks/errbacks/chain into body. + + Will huge lists in headers cause overhead? + The downside of keeping them in the body is that intermediates + won't be able to introspect these values. + +Definition +========== + +.. code-block:: python + + # protocol v2 implies UTC=True + # 'class' header existing means protocol is v2 + + properties = { + 'correlation_id': (uuid)task_id, + 'content_type': (string)mime, + 'content_encoding': (string)encoding, + + # optional + 'reply_to': (string)queue_or_url, + } + headers = { + 'lang': (string)'py' + 'c_type': (string)task, + + # optional + 'c_meth': (string)unused, + 'c_shadow': (string)replace_name, + 'eta': (iso8601)eta, + 'expires'; (iso8601)expires, + 'callbacks': (list)Signature, + 'errbacks': (list)Signature, + 'chain': (list)Signature, # non-recursive, reversed list of signatures + 'group': (uuid)group_id, + 'chord': (uuid)chord_id, + 'retries': (int)retries, + 'timelimit': (tuple)(soft, hard), + } + + body = (args, kwargs) + +Example +======= + +.. code-block:: python + + # chain: add(add(add(2, 2), 4), 8) == 2 + 2 + 4 + 8 + + task_id = uuid() + basic_publish( + message=json.dumps([[2, 2], {}]), + application_headers={ + 'lang': 'py', + 'c_type': 'proj.tasks.add', + 'chain': [ + # reversed chain list + {'task': 'proj.tasks.add', 'args': (8, )}, + {'task': 'proj.tasks.add', 'args': (4, )}, + ] + } + properties={ + 'correlation_id': task_id, + 'content_type': 'application/json', + 'content_encoding': 'utf-8', + } + ) diff --git a/docs/internals/reference/celery._state.rst b/docs/internals/reference/celery._state.rst new file mode 100644 index 0000000..658a2b7 --- /dev/null +++ b/docs/internals/reference/celery._state.rst @@ -0,0 +1,11 @@ +======================================== + celery._state +======================================== + +.. contents:: + :local: +.. currentmodule:: celery._state + +.. automodule:: celery._state + :members: + :undoc-members: diff --git a/docs/internals/reference/celery.app.annotations.rst b/docs/internals/reference/celery.app.annotations.rst new file mode 100644 index 0000000..ff9966e --- /dev/null +++ b/docs/internals/reference/celery.app.annotations.rst @@ -0,0 +1,11 @@ +========================================== + celery.app.annotations +========================================== + +.. contents:: + :local: +.. currentmodule:: celery.app.annotations + +.. automodule:: celery.app.annotations + :members: + :undoc-members: diff --git a/docs/internals/reference/celery.app.routes.rst b/docs/internals/reference/celery.app.routes.rst new file mode 100644 index 0000000..7a1cca6 --- /dev/null +++ b/docs/internals/reference/celery.app.routes.rst @@ -0,0 +1,11 @@ +================================= + celery.app.routes +================================= + +.. contents:: + :local: +.. currentmodule:: celery.app.routes + +.. automodule:: celery.app.routes + :members: + :undoc-members: diff --git a/docs/internals/reference/celery.app.trace.rst b/docs/internals/reference/celery.app.trace.rst new file mode 100644 index 0000000..92b5fe0 --- /dev/null +++ b/docs/internals/reference/celery.app.trace.rst @@ -0,0 +1,11 @@ +========================================== + celery.app.trace +========================================== + +.. contents:: + :local: +.. currentmodule:: celery.app.trace + +.. automodule:: celery.app.trace + :members: + :undoc-members: diff --git a/docs/internals/reference/celery.backends.amqp.rst b/docs/internals/reference/celery.backends.amqp.rst new file mode 100644 index 0000000..6e7b6ac --- /dev/null +++ b/docs/internals/reference/celery.backends.amqp.rst @@ -0,0 +1,11 @@ +======================================= + celery.backends.amqp +======================================= + +.. contents:: + :local: +.. currentmodule:: celery.backends.amqp + +.. automodule:: celery.backends.amqp + :members: + :undoc-members: diff --git a/docs/internals/reference/celery.backends.base.rst b/docs/internals/reference/celery.backends.base.rst new file mode 100644 index 0000000..dfbee0f --- /dev/null +++ b/docs/internals/reference/celery.backends.base.rst @@ -0,0 +1,13 @@ +===================================== + celery.backends.base +===================================== + +.. contents:: + :local: +.. currentmodule:: celery.backends.base + +.. automodule:: celery.backends.base + :members: + :undoc-members: + + diff --git a/docs/internals/reference/celery.backends.cache.rst b/docs/internals/reference/celery.backends.cache.rst new file mode 100644 index 0000000..7df684c --- /dev/null +++ b/docs/internals/reference/celery.backends.cache.rst @@ -0,0 +1,11 @@ +=========================================== + celery.backends.cache +=========================================== + +.. contents:: + :local: +.. currentmodule:: celery.backends.cache + +.. automodule:: celery.backends.cache + :members: + :undoc-members: diff --git a/docs/internals/reference/celery.backends.cassandra.rst b/docs/internals/reference/celery.backends.cassandra.rst new file mode 100644 index 0000000..7c8f2bf --- /dev/null +++ b/docs/internals/reference/celery.backends.cassandra.rst @@ -0,0 +1,11 @@ +================================================ + celery.backends.cassandra +================================================ + +.. contents:: + :local: +.. currentmodule:: celery.backends.cassandra + +.. automodule:: celery.backends.cassandra + :members: + :undoc-members: diff --git a/docs/internals/reference/celery.backends.couchbase.rst b/docs/internals/reference/celery.backends.couchbase.rst new file mode 100644 index 0000000..43afc42 --- /dev/null +++ b/docs/internals/reference/celery.backends.couchbase.rst @@ -0,0 +1,11 @@ +============================================ + celery.backends.couchbase +============================================ + +.. contents:: + :local: +.. currentmodule:: celery.backends.couchbase + +.. automodule:: celery.backends.couchbase + :members: + :undoc-members: diff --git a/docs/internals/reference/celery.backends.database.models.rst b/docs/internals/reference/celery.backends.database.models.rst new file mode 100644 index 0000000..fa50c5d --- /dev/null +++ b/docs/internals/reference/celery.backends.database.models.rst @@ -0,0 +1,11 @@ +====================================== + celery.backends.database.models +====================================== + +.. contents:: + :local: +.. currentmodule:: celery.backends.database.models + +.. automodule:: celery.backends.database.models + :members: + :undoc-members: diff --git a/docs/internals/reference/celery.backends.database.rst b/docs/internals/reference/celery.backends.database.rst new file mode 100644 index 0000000..eeb0e5f --- /dev/null +++ b/docs/internals/reference/celery.backends.database.rst @@ -0,0 +1,11 @@ +========================================================= + celery.backends.database +========================================================= + +.. contents:: + :local: +.. currentmodule:: celery.backends.database + +.. automodule:: celery.backends.database + :members: + :undoc-members: diff --git a/docs/internals/reference/celery.backends.database.session.rst b/docs/internals/reference/celery.backends.database.session.rst new file mode 100644 index 0000000..e6fc71b --- /dev/null +++ b/docs/internals/reference/celery.backends.database.session.rst @@ -0,0 +1,11 @@ +======================================== + celery.backends.database.session +======================================== + +.. contents:: + :local: +.. currentmodule:: celery.backends.database.session + +.. automodule:: celery.backends.database.session + :members: + :undoc-members: diff --git a/docs/internals/reference/celery.backends.mongodb.rst b/docs/internals/reference/celery.backends.mongodb.rst new file mode 100644 index 0000000..2b3f243 --- /dev/null +++ b/docs/internals/reference/celery.backends.mongodb.rst @@ -0,0 +1,11 @@ +============================================ + celery.backends.mongodb +============================================ + +.. contents:: + :local: +.. currentmodule:: celery.backends.mongodb + +.. automodule:: celery.backends.mongodb + :members: + :undoc-members: diff --git a/docs/internals/reference/celery.backends.redis.rst b/docs/internals/reference/celery.backends.redis.rst new file mode 100644 index 0000000..8fcd602 --- /dev/null +++ b/docs/internals/reference/celery.backends.redis.rst @@ -0,0 +1,11 @@ +========================================== + celery.backends.redis +========================================== + +.. contents:: + :local: +.. currentmodule:: celery.backends.redis + +.. automodule:: celery.backends.redis + :members: + :undoc-members: diff --git a/docs/internals/reference/celery.backends.rpc.rst b/docs/internals/reference/celery.backends.rpc.rst new file mode 100644 index 0000000..3eb0948 --- /dev/null +++ b/docs/internals/reference/celery.backends.rpc.rst @@ -0,0 +1,11 @@ +======================================= + celery.backends.rpc +======================================= + +.. contents:: + :local: +.. currentmodule:: celery.backends.rpc + +.. automodule:: celery.backends.rpc + :members: + :undoc-members: diff --git a/docs/internals/reference/celery.backends.rst b/docs/internals/reference/celery.backends.rst new file mode 100644 index 0000000..c9b4f18 --- /dev/null +++ b/docs/internals/reference/celery.backends.rst @@ -0,0 +1,11 @@ +=========================== + celery.backends +=========================== + +.. contents:: + :local: +.. currentmodule:: celery.backends + +.. automodule:: celery.backends + :members: + :undoc-members: diff --git a/docs/internals/reference/celery.concurrency.base.rst b/docs/internals/reference/celery.concurrency.base.rst new file mode 100644 index 0000000..7e30189 --- /dev/null +++ b/docs/internals/reference/celery.concurrency.base.rst @@ -0,0 +1,11 @@ +=============================================== + celery.concurrency.base +=============================================== + +.. contents:: + :local: +.. currentmodule:: celery.concurrency.base + +.. automodule:: celery.concurrency.base + :members: + :undoc-members: diff --git a/docs/internals/reference/celery.concurrency.eventlet.rst b/docs/internals/reference/celery.concurrency.eventlet.rst new file mode 100644 index 0000000..1833064 --- /dev/null +++ b/docs/internals/reference/celery.concurrency.eventlet.rst @@ -0,0 +1,11 @@ +============================================================= + celery.concurrency.eventlet +============================================================= + +.. contents:: + :local: +.. currentmodule:: celery.concurrency.eventlet + +.. automodule:: celery.concurrency.eventlet + :members: + :undoc-members: diff --git a/docs/internals/reference/celery.concurrency.gevent.rst b/docs/internals/reference/celery.concurrency.gevent.rst new file mode 100644 index 0000000..21d122f --- /dev/null +++ b/docs/internals/reference/celery.concurrency.gevent.rst @@ -0,0 +1,11 @@ +============================================================= + celery.concurrency.gevent† (*experimental*) +============================================================= + +.. contents:: + :local: +.. currentmodule:: celery.concurrency.gevent + +.. automodule:: celery.concurrency.gevent + :members: + :undoc-members: diff --git a/docs/internals/reference/celery.concurrency.prefork.rst b/docs/internals/reference/celery.concurrency.prefork.rst new file mode 100644 index 0000000..864048f --- /dev/null +++ b/docs/internals/reference/celery.concurrency.prefork.rst @@ -0,0 +1,11 @@ +============================================================= + celery.concurrency.prefork +============================================================= + +.. contents:: + :local: +.. currentmodule:: celery.concurrency.prefork + +.. automodule:: celery.concurrency.prefork + :members: + :undoc-members: diff --git a/docs/internals/reference/celery.concurrency.rst b/docs/internals/reference/celery.concurrency.rst new file mode 100644 index 0000000..3e84c14 --- /dev/null +++ b/docs/internals/reference/celery.concurrency.rst @@ -0,0 +1,11 @@ +================================== + celery.concurrency +================================== + +.. contents:: + :local: +.. currentmodule:: celery.concurrency + +.. automodule:: celery.concurrency + :members: + :undoc-members: diff --git a/docs/internals/reference/celery.concurrency.solo.rst b/docs/internals/reference/celery.concurrency.solo.rst new file mode 100644 index 0000000..cda0769 --- /dev/null +++ b/docs/internals/reference/celery.concurrency.solo.rst @@ -0,0 +1,11 @@ +=================================================================== + celery.concurrency.solo +=================================================================== + +.. contents:: + :local: +.. currentmodule:: celery.concurrency.solo + +.. automodule:: celery.concurrency.solo + :members: + :undoc-members: diff --git a/docs/internals/reference/celery.concurrency.threads.rst b/docs/internals/reference/celery.concurrency.threads.rst new file mode 100644 index 0000000..663d1fc --- /dev/null +++ b/docs/internals/reference/celery.concurrency.threads.rst @@ -0,0 +1,11 @@ +=================================================================== + celery.concurrency.threads‡ (**minefield**) +=================================================================== + +.. contents:: + :local: +.. currentmodule:: celery.concurrency.threads + +.. automodule:: celery.concurrency.threads + :members: + :undoc-members: diff --git a/docs/internals/reference/celery.datastructures.rst b/docs/internals/reference/celery.datastructures.rst new file mode 100644 index 0000000..bee31b3 --- /dev/null +++ b/docs/internals/reference/celery.datastructures.rst @@ -0,0 +1,49 @@ +.. currentmodule:: celery.datastructures + +.. automodule:: celery.datastructures + + .. contents:: + :local: + + AttributeDict + ------------- + + .. autoclass:: AttributeDict + :members: + + .. autoclass:: AttributeDictMixin + :members: + + DictAttribute + ------------- + + .. autoclass:: DictAttribute + :members: + :undoc-members: + + ConfigurationView + ----------------- + + .. autoclass:: ConfigurationView + :members: + :undoc-members: + + ExceptionInfo + ------------- + + .. autoclass:: ExceptionInfo + :members: + + LimitedSet + ---------- + + .. autoclass:: LimitedSet + :members: + :undoc-members: + + LRUCache + -------- + + .. autoclass:: LRUCache + :members: + :undoc-members: diff --git a/docs/internals/reference/celery.events.cursesmon.rst b/docs/internals/reference/celery.events.cursesmon.rst new file mode 100644 index 0000000..7f6d050 --- /dev/null +++ b/docs/internals/reference/celery.events.cursesmon.rst @@ -0,0 +1,11 @@ +========================================== + celery.events.cursesmon +========================================== + +.. contents:: + :local: +.. currentmodule:: celery.events.cursesmon + +.. automodule:: celery.events.cursesmon + :members: + :undoc-members: diff --git a/docs/internals/reference/celery.events.dumper.rst b/docs/internals/reference/celery.events.dumper.rst new file mode 100644 index 0000000..f1fe106 --- /dev/null +++ b/docs/internals/reference/celery.events.dumper.rst @@ -0,0 +1,11 @@ +========================================== + celery.events.dumper +========================================== + +.. contents:: + :local: +.. currentmodule:: celery.events.dumper + +.. automodule:: celery.events.dumper + :members: + :undoc-members: diff --git a/docs/internals/reference/celery.events.snapshot.rst b/docs/internals/reference/celery.events.snapshot.rst new file mode 100644 index 0000000..906b19f --- /dev/null +++ b/docs/internals/reference/celery.events.snapshot.rst @@ -0,0 +1,11 @@ +========================================== + celery.events.snapshot +========================================== + +.. contents:: + :local: +.. currentmodule:: celery.events.snapshot + +.. automodule:: celery.events.snapshot + :members: + :undoc-members: diff --git a/docs/internals/reference/celery.platforms.rst b/docs/internals/reference/celery.platforms.rst new file mode 100644 index 0000000..d83760b --- /dev/null +++ b/docs/internals/reference/celery.platforms.rst @@ -0,0 +1,11 @@ +====================================== + celery.platforms +====================================== + +.. contents:: + :local: +.. currentmodule:: celery.platforms + +.. automodule:: celery.platforms + :members: + :undoc-members: diff --git a/docs/internals/reference/celery.security.certificate.rst b/docs/internals/reference/celery.security.certificate.rst new file mode 100644 index 0000000..6763a1f --- /dev/null +++ b/docs/internals/reference/celery.security.certificate.rst @@ -0,0 +1,11 @@ +========================================== + celery.security.certificate +========================================== + +.. contents:: + :local: +.. currentmodule:: celery.security.certificate + +.. automodule:: celery.security.certificate + :members: + :undoc-members: diff --git a/docs/internals/reference/celery.security.key.rst b/docs/internals/reference/celery.security.key.rst new file mode 100644 index 0000000..0c2ba57 --- /dev/null +++ b/docs/internals/reference/celery.security.key.rst @@ -0,0 +1,11 @@ +========================================== + celery.security.key +========================================== + +.. contents:: + :local: +.. currentmodule:: celery.security.key + +.. automodule:: celery.security.key + :members: + :undoc-members: diff --git a/docs/internals/reference/celery.security.serialization.rst b/docs/internals/reference/celery.security.serialization.rst new file mode 100644 index 0000000..f234994 --- /dev/null +++ b/docs/internals/reference/celery.security.serialization.rst @@ -0,0 +1,11 @@ +========================================== + celery.security.serialization +========================================== + +.. contents:: + :local: +.. currentmodule:: celery.security.serialization + +.. automodule:: celery.security.serialization + :members: + :undoc-members: diff --git a/docs/internals/reference/celery.security.utils.rst b/docs/internals/reference/celery.security.utils.rst new file mode 100644 index 0000000..2837cf9 --- /dev/null +++ b/docs/internals/reference/celery.security.utils.rst @@ -0,0 +1,11 @@ +========================================== + celery.security.utils +========================================== + +.. contents:: + :local: +.. currentmodule:: celery.security.utils + +.. automodule:: celery.security.utils + :members: + :undoc-members: diff --git a/docs/internals/reference/celery.utils.compat.rst b/docs/internals/reference/celery.utils.compat.rst new file mode 100644 index 0000000..851851f --- /dev/null +++ b/docs/internals/reference/celery.utils.compat.rst @@ -0,0 +1,11 @@ +============================================ + celery.utils.compat +============================================ + +.. contents:: + :local: +.. currentmodule:: celery.utils.compat + +.. automodule:: celery.utils.compat + :members: + :undoc-members: diff --git a/docs/internals/reference/celery.utils.dispatch.rst b/docs/internals/reference/celery.utils.dispatch.rst new file mode 100644 index 0000000..e60bc08 --- /dev/null +++ b/docs/internals/reference/celery.utils.dispatch.rst @@ -0,0 +1,11 @@ +========================================= + celery.utils.dispatch +========================================= + +.. contents:: + :local: +.. currentmodule:: celery.utils.dispatch + +.. automodule:: celery.utils.dispatch + :members: + :undoc-members: diff --git a/docs/internals/reference/celery.utils.dispatch.saferef.rst b/docs/internals/reference/celery.utils.dispatch.saferef.rst new file mode 100644 index 0000000..78b79b9 --- /dev/null +++ b/docs/internals/reference/celery.utils.dispatch.saferef.rst @@ -0,0 +1,11 @@ +========================================================== + celery.utils.dispatch.saferef +========================================================== + +.. contents:: + :local: +.. currentmodule:: celery.utils.dispatch.saferef + +.. automodule:: celery.utils.dispatch.saferef + :members: + :undoc-members: diff --git a/docs/internals/reference/celery.utils.dispatch.signal.rst b/docs/internals/reference/celery.utils.dispatch.signal.rst new file mode 100644 index 0000000..5c19b73 --- /dev/null +++ b/docs/internals/reference/celery.utils.dispatch.signal.rst @@ -0,0 +1,11 @@ +==================================================== + celery.utils.dispatch.signal +==================================================== + +.. contents:: + :local: +.. currentmodule:: celery.utils.dispatch.signal + +.. automodule:: celery.utils.dispatch.signal + :members: + :undoc-members: diff --git a/docs/internals/reference/celery.utils.functional.rst b/docs/internals/reference/celery.utils.functional.rst new file mode 100644 index 0000000..727f781 --- /dev/null +++ b/docs/internals/reference/celery.utils.functional.rst @@ -0,0 +1,11 @@ +===================================================== + celery.utils.functional +===================================================== + +.. contents:: + :local: +.. currentmodule:: celery.utils.functional + +.. automodule:: celery.utils.functional + :members: + :undoc-members: diff --git a/docs/internals/reference/celery.utils.imports.rst b/docs/internals/reference/celery.utils.imports.rst new file mode 100644 index 0000000..e16d264 --- /dev/null +++ b/docs/internals/reference/celery.utils.imports.rst @@ -0,0 +1,11 @@ +===================================================== + celery.utils.imports +===================================================== + +.. contents:: + :local: +.. currentmodule:: celery.utils.imports + +.. automodule:: celery.utils.imports + :members: + :undoc-members: diff --git a/docs/internals/reference/celery.utils.iso8601.rst b/docs/internals/reference/celery.utils.iso8601.rst new file mode 100644 index 0000000..55fb0a2 --- /dev/null +++ b/docs/internals/reference/celery.utils.iso8601.rst @@ -0,0 +1,11 @@ +================================================== + celery.utils.iso8601 +================================================== + +.. contents:: + :local: +.. currentmodule:: celery.utils.iso8601 + +.. automodule:: celery.utils.iso8601 + :members: + :undoc-members: diff --git a/docs/internals/reference/celery.utils.log.rst b/docs/internals/reference/celery.utils.log.rst new file mode 100644 index 0000000..6970f35 --- /dev/null +++ b/docs/internals/reference/celery.utils.log.rst @@ -0,0 +1,11 @@ +===================================================== + celery.utils.log +===================================================== + +.. contents:: + :local: +.. currentmodule:: celery.utils.log + +.. automodule:: celery.utils.log + :members: + :undoc-members: diff --git a/docs/internals/reference/celery.utils.objects.rst b/docs/internals/reference/celery.utils.objects.rst new file mode 100644 index 0000000..845f161 --- /dev/null +++ b/docs/internals/reference/celery.utils.objects.rst @@ -0,0 +1,11 @@ +================================================== + celery.utils.objects +================================================== + +.. contents:: + :local: +.. currentmodule:: celery.utils.objects + +.. automodule:: celery.utils.objects + :members: + :undoc-members: diff --git a/docs/internals/reference/celery.utils.rst b/docs/internals/reference/celery.utils.rst new file mode 100644 index 0000000..3662e70 --- /dev/null +++ b/docs/internals/reference/celery.utils.rst @@ -0,0 +1,11 @@ +========================== + celery.utils +========================== + +.. contents:: + :local: +.. currentmodule:: celery.utils + +.. automodule:: celery.utils + :members: + :undoc-members: diff --git a/docs/internals/reference/celery.utils.serialization.rst b/docs/internals/reference/celery.utils.serialization.rst new file mode 100644 index 0000000..9d298e5 --- /dev/null +++ b/docs/internals/reference/celery.utils.serialization.rst @@ -0,0 +1,11 @@ +============================================ + celery.utils.serialization +============================================ + +.. contents:: + :local: +.. currentmodule:: celery.utils.serialization + +.. automodule:: celery.utils.serialization + :members: + :undoc-members: diff --git a/docs/internals/reference/celery.utils.sysinfo.rst b/docs/internals/reference/celery.utils.sysinfo.rst new file mode 100644 index 0000000..ab6f9fd --- /dev/null +++ b/docs/internals/reference/celery.utils.sysinfo.rst @@ -0,0 +1,11 @@ +================================================== + celery.utils.sysinfo +================================================== + +.. contents:: + :local: +.. currentmodule:: celery.utils.sysinfo + +.. automodule:: celery.utils.sysinfo + :members: + :undoc-members: diff --git a/docs/internals/reference/celery.utils.term.rst b/docs/internals/reference/celery.utils.term.rst new file mode 100644 index 0000000..555e96f --- /dev/null +++ b/docs/internals/reference/celery.utils.term.rst @@ -0,0 +1,11 @@ +===================================================== + celery.utils.term +===================================================== + +.. contents:: + :local: +.. currentmodule:: celery.utils.term + +.. automodule:: celery.utils.term + :members: + :undoc-members: diff --git a/docs/internals/reference/celery.utils.text.rst b/docs/internals/reference/celery.utils.text.rst new file mode 100644 index 0000000..31f7440 --- /dev/null +++ b/docs/internals/reference/celery.utils.text.rst @@ -0,0 +1,11 @@ +===================================================== + celery.utils.text +===================================================== + +.. contents:: + :local: +.. currentmodule:: celery.utils.text + +.. automodule:: celery.utils.text + :members: + :undoc-members: diff --git a/docs/internals/reference/celery.utils.threads.rst b/docs/internals/reference/celery.utils.threads.rst new file mode 100644 index 0000000..32da5da --- /dev/null +++ b/docs/internals/reference/celery.utils.threads.rst @@ -0,0 +1,11 @@ +========================================== + celery.utils.threads +========================================== + +.. contents:: + :local: +.. currentmodule:: celery.utils.threads + +.. automodule:: celery.utils.threads + :members: + :undoc-members: diff --git a/docs/internals/reference/celery.utils.timer2.rst b/docs/internals/reference/celery.utils.timer2.rst new file mode 100644 index 0000000..d4d4af5 --- /dev/null +++ b/docs/internals/reference/celery.utils.timer2.rst @@ -0,0 +1,11 @@ +============================== + celery.utils.timer2 +============================== + +.. contents:: + :local: +.. currentmodule:: celery.utils.timer2 + +.. automodule:: celery.utils.timer2 + :members: + :undoc-members: diff --git a/docs/internals/reference/celery.utils.timeutils.rst b/docs/internals/reference/celery.utils.timeutils.rst new file mode 100644 index 0000000..080a642 --- /dev/null +++ b/docs/internals/reference/celery.utils.timeutils.rst @@ -0,0 +1,11 @@ +================================================== + celery.utils.timeutils +================================================== + +.. contents:: + :local: +.. currentmodule:: celery.utils.timeutils + +.. automodule:: celery.utils.timeutils + :members: + :undoc-members: diff --git a/docs/internals/reference/celery.worker.autoreload.rst b/docs/internals/reference/celery.worker.autoreload.rst new file mode 100644 index 0000000..63b17e7 --- /dev/null +++ b/docs/internals/reference/celery.worker.autoreload.rst @@ -0,0 +1,11 @@ +==================================== + celery.worker.autoreload +==================================== + +.. contents:: + :local: +.. currentmodule:: celery.worker.autoreload + +.. automodule:: celery.worker.autoreload + :members: + :undoc-members: diff --git a/docs/internals/reference/celery.worker.autoscale.rst b/docs/internals/reference/celery.worker.autoscale.rst new file mode 100644 index 0000000..f3e7af7 --- /dev/null +++ b/docs/internals/reference/celery.worker.autoscale.rst @@ -0,0 +1,11 @@ +======================================== + celery.worker.autoscale +======================================== + +.. contents:: + :local: +.. currentmodule:: celery.worker.autoscale + +.. automodule:: celery.worker.autoscale + :members: + :undoc-members: diff --git a/docs/internals/reference/celery.worker.components.rst b/docs/internals/reference/celery.worker.components.rst new file mode 100644 index 0000000..7757c56 --- /dev/null +++ b/docs/internals/reference/celery.worker.components.rst @@ -0,0 +1,11 @@ +======================================== + celery.worker.components +======================================== + +.. contents:: + :local: +.. currentmodule:: celery.worker.components + +.. automodule:: celery.worker.components + :members: + :undoc-members: diff --git a/docs/internals/reference/celery.worker.control.rst b/docs/internals/reference/celery.worker.control.rst new file mode 100644 index 0000000..c6bf770 --- /dev/null +++ b/docs/internals/reference/celery.worker.control.rst @@ -0,0 +1,11 @@ +============================================= + celery.worker.control +============================================= + +.. contents:: + :local: +.. currentmodule:: celery.worker.control + +.. automodule:: celery.worker.control + :members: + :undoc-members: diff --git a/docs/internals/reference/celery.worker.heartbeat.rst b/docs/internals/reference/celery.worker.heartbeat.rst new file mode 100644 index 0000000..184c11b --- /dev/null +++ b/docs/internals/reference/celery.worker.heartbeat.rst @@ -0,0 +1,11 @@ +============================================= + celery.worker.heartbeat +============================================= + +.. contents:: + :local: +.. currentmodule:: celery.worker.heartbeat + +.. automodule:: celery.worker.heartbeat + :members: + :undoc-members: diff --git a/docs/internals/reference/celery.worker.loops.rst b/docs/internals/reference/celery.worker.loops.rst new file mode 100644 index 0000000..0535afb --- /dev/null +++ b/docs/internals/reference/celery.worker.loops.rst @@ -0,0 +1,11 @@ +==================================== + celery.worker.loops +==================================== + +.. contents:: + :local: +.. currentmodule:: celery.worker.loops + +.. automodule:: celery.worker.loops + :members: + :undoc-members: diff --git a/docs/internals/reference/celery.worker.pidbox.rst b/docs/internals/reference/celery.worker.pidbox.rst new file mode 100644 index 0000000..53c3dc0 --- /dev/null +++ b/docs/internals/reference/celery.worker.pidbox.rst @@ -0,0 +1,11 @@ +==================================== + celery.worker.pidbox +==================================== + +.. contents:: + :local: +.. currentmodule:: celery.worker.pidbox + +.. automodule:: celery.worker.pidbox + :members: + :undoc-members: diff --git a/docs/internals/reference/index.rst b/docs/internals/reference/index.rst new file mode 100644 index 0000000..31b6061 --- /dev/null +++ b/docs/internals/reference/index.rst @@ -0,0 +1,66 @@ +=========================== + Internal Module Reference +=========================== + +:Release: |version| +:Date: |today| + +.. toctree:: + :maxdepth: 1 + + celery.worker.components + celery.worker.loops + celery.worker.heartbeat + celery.worker.control + celery.worker.pidbox + celery.worker.autoreload + celery.worker.autoscale + celery.concurrency + celery.concurrency.solo + celery.concurrency.prefork + celery.concurrency.eventlet + celery.concurrency.gevent + celery.concurrency.base + celery.concurrency.threads + celery.backends + celery.backends.base + celery.backends.rpc + celery.backends.database + celery.backends.cache + celery.backends.amqp + celery.backends.mongodb + celery.backends.redis + celery.backends.cassandra + celery.backends.couchbase + celery.app.trace + celery.app.annotations + celery.app.routes + celery.datastructures + celery.security.certificate + celery.security.key + celery.security.serialization + celery.security.utils + celery.events.snapshot + celery.events.cursesmon + celery.events.dumper + celery.backends.database.models + celery.backends.database.session + celery.utils + celery.utils.functional + celery.utils.objects + celery.utils.term + celery.utils.timeutils + celery.utils.iso8601 + celery.utils.compat + celery.utils.serialization + celery.utils.sysinfo + celery.utils.threads + celery.utils.timer2 + celery.utils.imports + celery.utils.log + celery.utils.text + celery.utils.dispatch + celery.utils.dispatch.signal + celery.utils.dispatch.saferef + celery.platforms + celery._state diff --git a/docs/internals/worker.rst b/docs/internals/worker.rst new file mode 100644 index 0000000..30eb641 --- /dev/null +++ b/docs/internals/worker.rst @@ -0,0 +1,56 @@ +.. _internals-worker: + +======================= + Internals: The worker +======================= + +.. contents:: + :local: + +Introduction +============ + +The worker consists of 4 main components: the consumer, the scheduler, +the mediator and the task pool. All these components runs in parallel working +with two data structures: the ready queue and the ETA schedule. + +Data structures +=============== + +timer +----- + +The timer uses :mod:`heapq` to schedule internal functions. +It's very efficient and can handle hundred of thousands of entries. + + +Components +========== + +Consumer +-------- + +Receives messages from the broker using `Kombu`_. + +.. _`Kombu`: http://pypi.python.org/pypi/kombu + +When a message is received it's converted into a +:class:`celery.worker.job.TaskRequest` object. + +Tasks with an ETA, or rate-limit are entered into the `timer`, +messages that can be immediately processed are sent to the execution pool. + +Timer +----- + +The timer schedules internal functions, like cleanup and internal monitoring, +but also it schedules ETA tasks and rate limited tasks. +If the scheduled tasks eta has passed it is moved to the execution pool. + +TaskPool +-------- + +This is a slightly modified :class:`multiprocessing.Pool`. +It mostly works the same way, except it makes sure all of the workers +are running at all times. If a worker is missing, it replaces +it with a new one. diff --git a/docs/reference/celery.app.amqp.rst b/docs/reference/celery.app.amqp.rst new file mode 100644 index 0000000..4675528 --- /dev/null +++ b/docs/reference/celery.app.amqp.rst @@ -0,0 +1,50 @@ +.. currentmodule:: celery.app.amqp + +.. automodule:: celery.app.amqp + + .. contents:: + :local: + + AMQP + ---- + + .. autoclass:: AMQP + + .. attribute:: Connection + + Broker connection class used. Default is + :class:`kombu.Connection`. + + .. attribute:: Consumer + + Base Consumer class used. Default is :class:`kombu.compat.Consumer`. + + .. attribute:: queues + + All currently defined task queues. (A :class:`Queues` instance). + + .. automethod:: Queues + .. automethod:: Router + .. autoattribute:: TaskConsumer + .. autoattribute:: TaskProducer + .. automethod:: flush_routes + + .. autoattribute:: default_queue + .. autoattribute:: default_exchange + .. autoattribute:: publisher_pool + .. autoattribute:: router + .. autoattribute:: routes + + Queues + ------ + + .. autoclass:: Queues + :members: + :undoc-members: + + TaskPublisher + ------------- + + .. autoclass:: TaskPublisher + :members: + :undoc-members: diff --git a/docs/reference/celery.app.builtins.rst b/docs/reference/celery.app.builtins.rst new file mode 100644 index 0000000..6c6846d --- /dev/null +++ b/docs/reference/celery.app.builtins.rst @@ -0,0 +1,11 @@ +==================================================== + celery.app.builtins +==================================================== + +.. contents:: + :local: +.. currentmodule:: celery.app.builtins + +.. automodule:: celery.app.builtins + :members: + :undoc-members: diff --git a/docs/reference/celery.app.control.rst b/docs/reference/celery.app.control.rst new file mode 100644 index 0000000..106739e --- /dev/null +++ b/docs/reference/celery.app.control.rst @@ -0,0 +1,11 @@ +==================================================== + celery.app.control +==================================================== + +.. contents:: + :local: +.. currentmodule:: celery.app.control + +.. automodule:: celery.app.control + :members: + :undoc-members: diff --git a/docs/reference/celery.app.defaults.rst b/docs/reference/celery.app.defaults.rst new file mode 100644 index 0000000..ec1fb16 --- /dev/null +++ b/docs/reference/celery.app.defaults.rst @@ -0,0 +1,11 @@ +=============================================================== + celery.app.defaults +=============================================================== + +.. contents:: + :local: +.. currentmodule:: celery.app.defaults + +.. automodule:: celery.app.defaults + :members: + :undoc-members: diff --git a/docs/reference/celery.app.log.rst b/docs/reference/celery.app.log.rst new file mode 100644 index 0000000..7c4773b --- /dev/null +++ b/docs/reference/celery.app.log.rst @@ -0,0 +1,11 @@ +================================ + celery.app.log +================================ + +.. contents:: + :local: +.. currentmodule:: celery.app.log + +.. automodule:: celery.app.log + :members: + :undoc-members: diff --git a/docs/reference/celery.app.registry.rst b/docs/reference/celery.app.registry.rst new file mode 100644 index 0000000..f70095f --- /dev/null +++ b/docs/reference/celery.app.registry.rst @@ -0,0 +1,11 @@ +================================ + celery.app.registry +================================ + +.. contents:: + :local: +.. currentmodule:: celery.app.registry + +.. automodule:: celery.app.registry + :members: + :undoc-members: diff --git a/docs/reference/celery.app.rst b/docs/reference/celery.app.rst new file mode 100644 index 0000000..4d71491 --- /dev/null +++ b/docs/reference/celery.app.rst @@ -0,0 +1,26 @@ +.. currentmodule:: celery.app + +.. automodule:: celery.app + + .. contents:: + :local: + + Proxies + ------- + + .. autodata:: default_app + + + Functions + --------- + + .. autofunction:: app_or_default + .. autofunction:: enable_trace + .. autofunction:: disable_trace + + + Data + ---- + + .. autodata:: default_loader + diff --git a/docs/reference/celery.app.task.rst b/docs/reference/celery.app.task.rst new file mode 100644 index 0000000..9933f28 --- /dev/null +++ b/docs/reference/celery.app.task.rst @@ -0,0 +1,10 @@ +=================================== + celery.app.task +=================================== + +.. contents:: + :local: +.. currentmodule:: celery.app.task + +.. automodule:: celery.app.task + :members: Task, Context, TaskType diff --git a/docs/reference/celery.app.utils.rst b/docs/reference/celery.app.utils.rst new file mode 100644 index 0000000..a60a80f --- /dev/null +++ b/docs/reference/celery.app.utils.rst @@ -0,0 +1,11 @@ +================================ + celery.app.utils +================================ + +.. contents:: + :local: +.. currentmodule:: celery.app.utils + +.. automodule:: celery.app.utils + :members: + :undoc-members: diff --git a/docs/reference/celery.apps.beat.rst b/docs/reference/celery.apps.beat.rst new file mode 100644 index 0000000..7638665 --- /dev/null +++ b/docs/reference/celery.apps.beat.rst @@ -0,0 +1,11 @@ +================================================= + celery.apps.beat +================================================= + +.. contents:: + :local: +.. currentmodule:: celery.apps.beat + +.. automodule:: celery.apps.beat + :members: + :undoc-members: diff --git a/docs/reference/celery.apps.worker.rst b/docs/reference/celery.apps.worker.rst new file mode 100644 index 0000000..4907687 --- /dev/null +++ b/docs/reference/celery.apps.worker.rst @@ -0,0 +1,11 @@ +======================================= + celery.apps.worker +======================================= + +.. contents:: + :local: +.. currentmodule:: celery.apps.worker + +.. automodule:: celery.apps.worker + :members: + :undoc-members: diff --git a/docs/reference/celery.beat.rst b/docs/reference/celery.beat.rst new file mode 100644 index 0000000..b9bd272 --- /dev/null +++ b/docs/reference/celery.beat.rst @@ -0,0 +1,11 @@ +======================================== + celery.beat +======================================== + +.. contents:: + :local: +.. currentmodule:: celery.beat + +.. automodule:: celery.beat + :members: + :undoc-members: diff --git a/docs/reference/celery.bin.amqp.rst b/docs/reference/celery.bin.amqp.rst new file mode 100644 index 0000000..dfc4b75 --- /dev/null +++ b/docs/reference/celery.bin.amqp.rst @@ -0,0 +1,11 @@ +=========================================================== + celery.bin.amqp +=========================================================== + +.. contents:: + :local: +.. currentmodule:: celery.bin.amqp + +.. automodule:: celery.bin.amqp + :members: + :undoc-members: diff --git a/docs/reference/celery.bin.base.rst b/docs/reference/celery.bin.base.rst new file mode 100644 index 0000000..3766a61 --- /dev/null +++ b/docs/reference/celery.bin.base.rst @@ -0,0 +1,11 @@ +================================ + celery.bin.base +================================ + +.. contents:: + :local: +.. currentmodule:: celery.bin.base + +.. automodule:: celery.bin.base + :members: + :undoc-members: diff --git a/docs/reference/celery.bin.beat.rst b/docs/reference/celery.bin.beat.rst new file mode 100644 index 0000000..9675e0d --- /dev/null +++ b/docs/reference/celery.bin.beat.rst @@ -0,0 +1,11 @@ +=================================================== + celery.bin.beat +=================================================== + +.. contents:: + :local: +.. currentmodule:: celery.bin.beat + +.. automodule:: celery.bin.beat + :members: + :undoc-members: diff --git a/docs/reference/celery.bin.celery.rst b/docs/reference/celery.bin.celery.rst new file mode 100644 index 0000000..c65d125 --- /dev/null +++ b/docs/reference/celery.bin.celery.rst @@ -0,0 +1,11 @@ +========================================== + celery.bin.celery +========================================== + +.. contents:: + :local: +.. currentmodule:: celery.bin.celery + +.. automodule:: celery.bin.celery + :members: + :undoc-members: diff --git a/docs/reference/celery.bin.events.rst b/docs/reference/celery.bin.events.rst new file mode 100644 index 0000000..eb08681 --- /dev/null +++ b/docs/reference/celery.bin.events.rst @@ -0,0 +1,11 @@ +===================================================== + celery.bin.events +===================================================== + +.. contents:: + :local: +.. currentmodule:: celery.bin.events + +.. automodule:: celery.bin.events + :members: + :undoc-members: diff --git a/docs/reference/celery.bin.graph.rst b/docs/reference/celery.bin.graph.rst new file mode 100644 index 0000000..3a5ee50 --- /dev/null +++ b/docs/reference/celery.bin.graph.rst @@ -0,0 +1,11 @@ +===================================================== + celery.bin.graph +===================================================== + +.. contents:: + :local: +.. currentmodule:: celery.bin.graph + +.. automodule:: celery.bin.graph + :members: + :undoc-members: diff --git a/docs/reference/celery.bin.multi.rst b/docs/reference/celery.bin.multi.rst new file mode 100644 index 0000000..bf20c27 --- /dev/null +++ b/docs/reference/celery.bin.multi.rst @@ -0,0 +1,11 @@ +=============================================== + celery.bin.multi +=============================================== + +.. contents:: + :local: +.. currentmodule:: celery.bin.multi + +.. automodule:: celery.bin.multi + :members: + :undoc-members: diff --git a/docs/reference/celery.bin.worker.rst b/docs/reference/celery.bin.worker.rst new file mode 100644 index 0000000..273cb0b --- /dev/null +++ b/docs/reference/celery.bin.worker.rst @@ -0,0 +1,11 @@ +========================================== + celery.bin.worker +========================================== + +.. contents:: + :local: +.. currentmodule:: celery.bin.worker + +.. automodule:: celery.bin.worker + :members: + :undoc-members: diff --git a/docs/reference/celery.bootsteps.rst b/docs/reference/celery.bootsteps.rst new file mode 100644 index 0000000..73d4aa3 --- /dev/null +++ b/docs/reference/celery.bootsteps.rst @@ -0,0 +1,11 @@ +========================================== + celery.bootsteps +========================================== + +.. contents:: + :local: +.. currentmodule:: celery.bootsteps + +.. automodule:: celery.bootsteps + :members: + :undoc-members: diff --git a/docs/reference/celery.contrib.abortable.rst b/docs/reference/celery.contrib.abortable.rst new file mode 100644 index 0000000..24eef2a --- /dev/null +++ b/docs/reference/celery.contrib.abortable.rst @@ -0,0 +1,12 @@ +======================================================= + celery.contrib.abortable +======================================================= + +.. contents:: + :local: + +.. currentmodule:: celery.contrib.abortable + +.. automodule:: celery.contrib.abortable + :members: + :undoc-members: diff --git a/docs/reference/celery.contrib.batches.rst b/docs/reference/celery.contrib.batches.rst new file mode 100644 index 0000000..4f63924 --- /dev/null +++ b/docs/reference/celery.contrib.batches.rst @@ -0,0 +1,12 @@ +.. currentmodule:: celery.contrib.batches + +.. automodule:: celery.contrib.batches + + **API** + + .. autoclass:: Batches + :members: + :undoc-members: + .. autoclass:: SimpleRequest + :members: + :undoc-members: diff --git a/docs/reference/celery.contrib.methods.rst b/docs/reference/celery.contrib.methods.rst new file mode 100644 index 0000000..539234e --- /dev/null +++ b/docs/reference/celery.contrib.methods.rst @@ -0,0 +1,5 @@ +.. currentmodule:: celery.contrib.methods + +.. automodule:: celery.contrib.methods + :members: + :undoc-members: diff --git a/docs/reference/celery.contrib.migrate.rst b/docs/reference/celery.contrib.migrate.rst new file mode 100644 index 0000000..ce0c91a --- /dev/null +++ b/docs/reference/celery.contrib.migrate.rst @@ -0,0 +1,12 @@ +======================== + celery.contrib.migrate +======================== + +.. contents:: + :local: + +.. currentmodule:: celery.contrib.migrate + +.. automodule:: celery.contrib.migrate + :members: + :undoc-members: diff --git a/docs/reference/celery.contrib.rdb.rst b/docs/reference/celery.contrib.rdb.rst new file mode 100644 index 0000000..8818c43 --- /dev/null +++ b/docs/reference/celery.contrib.rdb.rst @@ -0,0 +1,7 @@ +.. currentmodule:: celery.contrib.rdb + +.. automodule:: celery.contrib.rdb + + .. autofunction:: set_trace + .. autofunction:: debugger + .. autoclass:: Rdb diff --git a/docs/reference/celery.contrib.sphinx.rst b/docs/reference/celery.contrib.sphinx.rst new file mode 100644 index 0000000..9bb0d3e --- /dev/null +++ b/docs/reference/celery.contrib.sphinx.rst @@ -0,0 +1,4 @@ +.. currentmodule:: celery.contrib.sphinx + +.. automodule:: celery.contrib.sphinx + :members: diff --git a/docs/reference/celery.events.rst b/docs/reference/celery.events.rst new file mode 100644 index 0000000..2ce8b1b --- /dev/null +++ b/docs/reference/celery.events.rst @@ -0,0 +1,11 @@ +======================== + celery.events +======================== + +.. contents:: + :local: +.. currentmodule:: celery.events + +.. automodule:: celery.events + :members: + :undoc-members: diff --git a/docs/reference/celery.events.state.rst b/docs/reference/celery.events.state.rst new file mode 100644 index 0000000..0943deb --- /dev/null +++ b/docs/reference/celery.events.state.rst @@ -0,0 +1,11 @@ +================================================================= + celery.events.state +================================================================= + +.. contents:: + :local: +.. currentmodule:: celery.events.state + +.. automodule:: celery.events.state + :members: + :undoc-members: diff --git a/docs/reference/celery.exceptions.rst b/docs/reference/celery.exceptions.rst new file mode 100644 index 0000000..fb8eee0 --- /dev/null +++ b/docs/reference/celery.exceptions.rst @@ -0,0 +1,11 @@ +================================ + celery.exceptions +================================ + +.. contents:: + :local: +.. currentmodule:: celery.exceptions + +.. automodule:: celery.exceptions + :members: + :undoc-members: diff --git a/docs/reference/celery.loaders.app.rst b/docs/reference/celery.loaders.app.rst new file mode 100644 index 0000000..8d7c17f --- /dev/null +++ b/docs/reference/celery.loaders.app.rst @@ -0,0 +1,11 @@ +================================= + celery.loaders.app +================================= + +.. contents:: + :local: +.. currentmodule:: celery.loaders.app + +.. automodule:: celery.loaders.app + :members: + :undoc-members: diff --git a/docs/reference/celery.loaders.base.rst b/docs/reference/celery.loaders.base.rst new file mode 100644 index 0000000..4ee8c1b --- /dev/null +++ b/docs/reference/celery.loaders.base.rst @@ -0,0 +1,11 @@ +=========================================== + celery.loaders.base +=========================================== + +.. contents:: + :local: +.. currentmodule:: celery.loaders.base + +.. automodule:: celery.loaders.base + :members: + :undoc-members: diff --git a/docs/reference/celery.loaders.default.rst b/docs/reference/celery.loaders.default.rst new file mode 100644 index 0000000..6210b7e --- /dev/null +++ b/docs/reference/celery.loaders.default.rst @@ -0,0 +1,11 @@ +========================================= + celery.loaders.default +========================================= + +.. contents:: + :local: +.. currentmodule:: celery.loaders.default + +.. automodule:: celery.loaders.default + :members: + :undoc-members: diff --git a/docs/reference/celery.loaders.rst b/docs/reference/celery.loaders.rst new file mode 100644 index 0000000..4804451 --- /dev/null +++ b/docs/reference/celery.loaders.rst @@ -0,0 +1,11 @@ +============================================ + celery.loaders +============================================ + +.. contents:: + :local: +.. currentmodule:: celery.loaders + +.. automodule:: celery.loaders + :members: + :undoc-members: diff --git a/docs/reference/celery.result.rst b/docs/reference/celery.result.rst new file mode 100644 index 0000000..d36c378 --- /dev/null +++ b/docs/reference/celery.result.rst @@ -0,0 +1,11 @@ +============================= + celery.result +============================= + +.. contents:: + :local: +.. currentmodule:: celery.result + +.. automodule:: celery.result + :members: + :undoc-members: diff --git a/docs/reference/celery.rst b/docs/reference/celery.rst new file mode 100644 index 0000000..90710a4 --- /dev/null +++ b/docs/reference/celery.rst @@ -0,0 +1,566 @@ +=========================================== + :mod:`celery` --- Distributed processing +=========================================== + +.. currentmodule:: celery +.. module:: celery + :synopsis: Distributed processing +.. moduleauthor:: Ask Solem +.. sectionauthor:: Ask Solem + +-------------- + +This module is the main entry-point for the Celery API. +It includes commonly needed things for calling tasks, +and creating Celery applications. + +===================== =================================================== +:class:`Celery` celery application instance +:class:`group` group tasks together +:class:`chain` chain tasks together +:class:`chord` chords enable callbacks for groups +:class:`signature` object describing a task invocation +:data:`current_app` proxy to the current application instance +:data:`current_task` proxy to the currently executing task +===================== =================================================== + +:class:`Celery` application objects +----------------------------------- + +.. versionadded:: 2.5 + +.. class:: Celery(main='__main__', broker='amqp://localhost//', …) + + :param main: Name of the main module if running as `__main__`. + This is used as a prefix for task names. + :keyword broker: URL of the default broker used. + :keyword loader: The loader class, or the name of the loader class to use. + Default is :class:`celery.loaders.app.AppLoader`. + :keyword backend: The result store backend class, or the name of the + backend class to use. Default is the value of the + :setting:`CELERY_RESULT_BACKEND` setting. + :keyword amqp: AMQP object or class name. + :keyword events: Events object or class name. + :keyword log: Log object or class name. + :keyword control: Control object or class name. + :keyword set_as_current: Make this the global current app. + :keyword tasks: A task registry or the name of a registry class. + :keyword include: List of modules every worker should import. + :keyword fixups: List of fixup plug-ins (see e.g. + :mod:`celery.fixups.django`). + :keyword autofinalize: If set to False a :exc:`RuntimeError` + will be raised if the task registry or tasks are used before + the app is finalized. + + .. attribute:: Celery.main + + Name of the `__main__` module. Required for standalone scripts. + + If set this will be used instead of `__main__` when automatically + generating task names. + + .. attribute:: Celery.conf + + Current configuration. + + .. attribute:: user_options + + Custom options for command-line programs. + See :ref:`extending-commandoptions` + + .. attribute:: steps + + Custom bootsteps to extend and modify the worker. + See :ref:`extending-bootsteps`. + + .. attribute:: Celery.current_task + + The instance of the task that is being executed, or :const:`None`. + + .. attribute:: Celery.amqp + + AMQP related functionality: :class:`~@amqp`. + + .. attribute:: Celery.backend + + Current backend instance. + + .. attribute:: Celery.loader + + Current loader instance. + + .. attribute:: Celery.control + + Remote control: :class:`~@control`. + + .. attribute:: Celery.events + + Consuming and sending events: :class:`~@events`. + + .. attribute:: Celery.log + + Logging: :class:`~@log`. + + .. attribute:: Celery.tasks + + Task registry. + + Accessing this attribute will also finalize the app. + + .. attribute:: Celery.pool + + Broker connection pool: :class:`~@pool`. + This attribute is not related to the workers concurrency pool. + + .. attribute:: Celery.Task + + Base task class for this app. + + .. attribute:: Celery.timezone + + Current timezone for this app. + This is a cached property taking the time zone from the + :setting:`CELERY_TIMEZONE` setting. + + .. method:: Celery.close + + Close any open pool connections and do any other steps necessary + to clean up after the application. + + Only necessary for dynamically created apps for which you can + use the with statement instead:: + + with Celery(set_as_current=False) as app: + with app.connection() as conn: + pass + + .. method:: Celery.signature + + Return a new :class:`~celery.canvas.Signature` bound to this app. + See :meth:`~celery.signature` + + .. method:: Celery.bugreport + + Return a string with information useful for the Celery core + developers when reporting a bug. + + .. method:: Celery.config_from_object(obj, silent=False, force=False) + + Reads configuration from object, where object is either + an object or the name of a module to import. + + :keyword silent: If true then import errors will be ignored. + + :keyword force: Force reading configuration immediately. + By default the configuration will be read only when required. + + .. code-block:: python + + >>> celery.config_from_object("myapp.celeryconfig") + + >>> from myapp import celeryconfig + >>> celery.config_from_object(celeryconfig) + + .. method:: Celery.config_from_envvar(variable_name, + silent=False, force=False) + + Read configuration from environment variable. + + The value of the environment variable must be the name + of a module to import. + + .. code-block:: python + + >>> os.environ["CELERY_CONFIG_MODULE"] = "myapp.celeryconfig" + >>> celery.config_from_envvar("CELERY_CONFIG_MODULE") + + .. method:: Celery.autodiscover_tasks(packages, related_name="tasks") + + With a list of packages, try to import modules of a specific name (by + default 'tasks'). + + For example if you have an (imagined) directory tree like this:: + + foo/__init__.py + tasks.py + models.py + + bar/__init__.py + tasks.py + models.py + + baz/__init__.py + models.py + + Then calling ``app.autodiscover_tasks(['foo', bar', 'baz'])`` will + result in the modules ``foo.tasks`` and ``bar.tasks`` being imported. + + :param packages: List of packages to search. + This argument may also be a callable, in which case the + value returned is used (for lazy evaluation). + + :keyword related_name: The name of the module to find. Defaults + to "tasks", which means it look for "module.tasks" for every + module in ``packages``. + :keyword force: By default this call is lazy so that the actual + autodiscovery will not happen until an application imports the + default modules. Forcing will cause the autodiscovery to happen + immediately. + + + .. method:: Celery.add_defaults(d) + + Add default configuration from dict ``d``. + + If the argument is a callable function then it will be regarded + as a promise, and it won't be loaded until the configuration is + actually needed. + + This method can be compared to:: + + >>> celery.conf.update(d) + + with a difference that 1) no copy will be made and 2) the dict will + not be transferred when the worker spawns child processes, so + it's important that the same configuration happens at import time + when pickle restores the object on the other side. + + .. method:: Celery.setup_security(…) + + Setup the message-signing serializer. + This will affect all application instances (a global operation). + + Disables untrusted serializers and if configured to use the ``auth`` + serializer will register the auth serializer with the provided settings + into the Kombu serializer registry. + + :keyword allowed_serializers: List of serializer names, or content_types + that should be exempt from being disabled. + :keyword key: Name of private key file to use. + Defaults to the :setting:`CELERY_SECURITY_KEY` setting. + :keyword cert: Name of certificate file to use. + Defaults to the :setting:`CELERY_SECURITY_CERTIFICATE` setting. + :keyword store: Directory containing certificates. + Defaults to the :setting:`CELERY_SECURITY_CERT_STORE` setting. + :keyword digest: Digest algorithm used when signing messages. + Default is ``sha1``. + :keyword serializer: Serializer used to encode messages after + they have been signed. See :setting:`CELERY_TASK_SERIALIZER` for + the serializers supported. + Default is ``json``. + + .. method:: Celery.start(argv=None) + + Run :program:`celery` using `argv`. + + Uses :data:`sys.argv` if `argv` is not specified. + + .. method:: Celery.task(fun, …) + + Decorator to create a task class out of any callable. + + Examples: + + .. code-block:: python + + @app.task + def refresh_feed(url): + return … + + with setting extra options: + + .. code-block:: python + + @app.task(exchange="feeds") + def refresh_feed(url): + return … + + .. admonition:: App Binding + + For custom apps the task decorator will return a proxy + object, so that the act of creating the task is not performed + until the task is used or the task registry is accessed. + + If you are depending on binding to be deferred, then you must + not access any attributes on the returned object until the + application is fully set up (finalized). + + + .. method:: Celery.send_task(name[, args[, kwargs[, …]]]) + + Send task by name. + + :param name: Name of task to call (e.g. `"tasks.add"`). + :keyword result_cls: Specify custom result class. Default is + using :meth:`AsyncResult`. + + Otherwise supports the same arguments as :meth:`@-Task.apply_async`. + + .. attribute:: Celery.AsyncResult + + Create new result instance. See :class:`~celery.result.AsyncResult`. + + .. attribute:: Celery.GroupResult + + Create new group result instance. + See :class:`~celery.result.GroupResult`. + + .. method:: Celery.worker_main(argv=None) + + Run :program:`celery worker` using `argv`. + + Uses :data:`sys.argv` if `argv` is not specified. + + .. attribute:: Celery.Worker + + Worker application. See :class:`~@Worker`. + + .. attribute:: Celery.WorkController + + Embeddable worker. See :class:`~@WorkController`. + + .. attribute:: Celery.Beat + + Celerybeat scheduler application. + See :class:`~@Beat`. + + .. method:: Celery.connection(url=default, [ssl, [transport_options={}]]) + + Establish a connection to the message broker. + + :param url: Either the URL or the hostname of the broker to use. + + :keyword hostname: URL, Hostname/IP-address of the broker. + If an URL is used, then the other argument below will + be taken from the URL instead. + :keyword userid: Username to authenticate as. + :keyword password: Password to authenticate with + :keyword virtual_host: Virtual host to use (domain). + :keyword port: Port to connect to. + :keyword ssl: Defaults to the :setting:`BROKER_USE_SSL` setting. + :keyword transport: defaults to the :setting:`BROKER_TRANSPORT` + setting. + + :returns :class:`kombu.Connection`: + + .. method:: Celery.connection_or_acquire(connection=None) + + For use within a with-statement to get a connection from the pool + if one is not already provided. + + :keyword connection: If not provided, then a connection will be + acquired from the connection pool. + + .. method:: Celery.producer_or_acquire(producer=None) + + For use within a with-statement to get a producer from the pool + if one is not already provided + + :keyword producer: If not provided, then a producer will be + acquired from the producer pool. + + .. method:: Celery.mail_admins(subject, body, fail_silently=False) + + Sends an email to the admins in the :setting:`ADMINS` setting. + + .. method:: Celery.select_queues(queues=[]) + + Select a subset of queues, where queues must be a list of queue + names to keep. + + .. method:: Celery.now() + + Return the current time and date as a :class:`~datetime.datetime` + object. + + .. method:: Celery.set_current() + + Makes this the current app for this thread. + + .. method:: Celery.finalize() + + Finalizes the app by loading built-in tasks, + and evaluating pending task decorators + + .. method:: Celery.on_configure() + + Optional callback for when the first time the configured is required. + + .. attribute:: Celery.Pickler + + Helper class used to pickle this application. + +Canvas primitives +----------------- + +See :ref:`guide-canvas` for more about creating task workflows. + +.. class:: group(task1[, task2[, task3[,… taskN]]]) + + Creates a group of tasks to be executed in parallel. + + Example:: + + >>> res = group([add.s(2, 2), add.s(4, 4)])() + >>> res.get() + [4, 8] + + A group is lazy so you must call it to take action and evaluate + the group. + + Will return a `group` task that when called will then call all of the + tasks in the group (and return a :class:`GroupResult` instance + that can be used to inspect the state of the group). + +.. class:: chain(task1[, task2[, task3[,… taskN]]]) + + Chains tasks together, so that each tasks follows each other + by being applied as a callback of the previous task. + + If called with only one argument, then that argument must + be an iterable of tasks to chain. + + Example:: + + >>> res = chain(add.s(2, 2), add.s(4))() + + is effectively :math:`(2 + 2) + 4)`:: + + >>> res.get() + 8 + + Calling a chain will return the result of the last task in the chain. + You can get to the other tasks by following the ``result.parent``'s:: + + >>> res.parent.get() + 4 + +.. class:: chord(header[, body]) + + A chord consists of a header and a body. + The header is a group of tasks that must complete before the callback is + called. A chord is essentially a callback for a group of tasks. + + Example:: + + >>> res = chord([add.s(2, 2), add.s(4, 4)])(sum_task.s()) + + is effectively :math:`\Sigma ((2 + 2) + (4 + 4))`:: + + >>> res.get() + 12 + + The body is applied with the return values of all the header + tasks as a list. + +.. class:: signature(task=None, args=(), kwargs={}, options={}) + + Describes the arguments and execution options for a single task invocation. + + Used as the parts in a :class:`group` or to safely pass + tasks around as callbacks. + + Signatures can also be created from tasks:: + + >>> add.subtask(args=(), kwargs={}, options={}) + + or the ``.s()`` shortcut:: + + >>> add.s(*args, **kwargs) + + :param task: Either a task class/instance, or the name of a task. + :keyword args: Positional arguments to apply. + :keyword kwargs: Keyword arguments to apply. + :keyword options: Additional options to :meth:`Task.apply_async`. + + Note that if the first argument is a :class:`dict`, the other + arguments will be ignored and the values in the dict will be used + instead. + + >>> s = signature("tasks.add", args=(2, 2)) + >>> signature(s) + {"task": "tasks.add", args=(2, 2), kwargs={}, options={}} + + .. method:: signature.__call__(*args \*\*kwargs) + + Call the task directly (in the current process). + + .. method:: signature.delay(*args, \*\*kwargs) + + Shortcut to :meth:`apply_async`. + + .. method:: signature.apply_async(args=(), kwargs={}, …) + + Apply this task asynchronously. + + :keyword args: Partial args to be prepended to the existing args. + :keyword kwargs: Partial kwargs to be merged with the existing kwargs. + :keyword options: Partial options to be merged with the existing + options. + + See :meth:`~@Task.apply_async`. + + .. method:: signature.apply(args=(), kwargs={}, …) + + Same as :meth:`apply_async` but executed the task inline instead + of sending a task message. + + .. method:: signature.freeze(_id=None) + + Finalize the signature by adding a concrete task id. + The task will not be called and you should not call the signature + twice after freezing it as that will result in two task messages + using the same task id. + + :returns: :class:`@AsyncResult` instance. + + .. method:: signature.clone(args=(), kwargs={}, …) + + Return a copy of this signature. + + :keyword args: Partial args to be prepended to the existing args. + :keyword kwargs: Partial kwargs to be merged with the existing kwargs. + :keyword options: Partial options to be merged with the existing + options. + + .. method:: signature.replace(args=None, kwargs=None, options=None) + + Replace the args, kwargs or options set for this signature. + These are only replaced if the selected is not :const:`None`. + + .. method:: signature.link(other_signature) + + Add a callback task to be applied if this task + executes successfully. + + :returns: ``other_signature`` (to work with :func:`~functools.reduce`). + + .. method:: signature.link_error(other_signature) + + Add a callback task to be applied if an error occurs + while executing this task. + + :returns: ``other_signature`` (to work with :func:`~functools.reduce`) + + .. method:: signature.set(…) + + Set arbitrary options (same as ``.options.update(…)``). + + This is a chaining method call (i.e. it will return ``self``). + + .. method:: signature.flatten_links() + + Gives a recursive list of dependencies (unchain if you will, + but with links intact). + +Proxies +------- + +.. data:: current_app + + The currently set app for this thread. + +.. data:: current_task + + The task currently being executed + (only set in the worker, or when eager/apply is used). diff --git a/docs/reference/celery.schedules.rst b/docs/reference/celery.schedules.rst new file mode 100644 index 0000000..f1afd73 --- /dev/null +++ b/docs/reference/celery.schedules.rst @@ -0,0 +1,11 @@ +===================================================== + celery.schedules +===================================================== + +.. contents:: + :local: +.. currentmodule:: celery.schedules + +.. automodule:: celery.schedules + :members: + :undoc-members: diff --git a/docs/reference/celery.security.rst b/docs/reference/celery.security.rst new file mode 100644 index 0000000..8b87c67 --- /dev/null +++ b/docs/reference/celery.security.rst @@ -0,0 +1,11 @@ +======================== + celery.security +======================== + +.. contents:: + :local: +.. currentmodule:: celery.security + +.. automodule:: celery.security + :members: + :undoc-members: diff --git a/docs/reference/celery.signals.rst b/docs/reference/celery.signals.rst new file mode 100644 index 0000000..8ea6f36 --- /dev/null +++ b/docs/reference/celery.signals.rst @@ -0,0 +1,11 @@ +====================================================== + celery.signals +====================================================== + +.. contents:: + :local: +.. currentmodule:: celery.signals + +.. automodule:: celery.signals + :members: + :undoc-members: diff --git a/docs/reference/celery.states.rst b/docs/reference/celery.states.rst new file mode 100644 index 0000000..ee89c58 --- /dev/null +++ b/docs/reference/celery.states.rst @@ -0,0 +1,8 @@ +.. currentmodule:: celery.states + +.. contents:: + :local: + +.. automodule:: celery.states + :members: + diff --git a/docs/reference/celery.task.http.rst b/docs/reference/celery.task.http.rst new file mode 100644 index 0000000..6f8f051 --- /dev/null +++ b/docs/reference/celery.task.http.rst @@ -0,0 +1,11 @@ +======================================== + celery.task.http +======================================== + +.. contents:: + :local: +.. currentmodule:: celery.task.http + +.. automodule:: celery.task.http + :members: + :undoc-members: diff --git a/docs/reference/celery.utils.debug.rst b/docs/reference/celery.utils.debug.rst new file mode 100644 index 0000000..07e2115 --- /dev/null +++ b/docs/reference/celery.utils.debug.rst @@ -0,0 +1,48 @@ +==================================== + celery.utils.debug +==================================== + +.. contents:: + :local: + +Sampling Memory Usage +===================== + +This module can be used to diagnose and sample the memory usage +used by parts of your application. + +E.g to sample the memory usage of calling tasks you can do this: + +.. code-block:: python + + + from celery.utils.debug import sample_mem, memdump + + from tasks import add + + + try: + for i in range(100): + for j in range(100): + add.delay(i, j) + sample_mem() + finally: + memdump() + + +API Reference +============= + +.. currentmodule:: celery.utils.debug + +.. automodule:: celery.utils.debug + + .. autofunction:: sample_mem + + .. autofunction:: memdump + + .. autofunction:: sample + + .. autofunction:: mem_rss + + .. autofunction:: ps diff --git a/docs/reference/celery.utils.mail.rst b/docs/reference/celery.utils.mail.rst new file mode 100644 index 0000000..ac7a41f --- /dev/null +++ b/docs/reference/celery.utils.mail.rst @@ -0,0 +1,11 @@ +==================================== + celery.utils.mail +==================================== + +.. contents:: + :local: +.. currentmodule:: celery.utils.mail + +.. automodule:: celery.utils.mail + :members: + :undoc-members: diff --git a/docs/reference/celery.worker.consumer.rst b/docs/reference/celery.worker.consumer.rst new file mode 100644 index 0000000..36b8812 --- /dev/null +++ b/docs/reference/celery.worker.consumer.rst @@ -0,0 +1,11 @@ +================================================== + celery.worker.consumer +================================================== + +.. contents:: + :local: +.. currentmodule:: celery.worker.consumer + +.. automodule:: celery.worker.consumer + :members: + :undoc-members: diff --git a/docs/reference/celery.worker.job.rst b/docs/reference/celery.worker.job.rst new file mode 100644 index 0000000..36fc1a7 --- /dev/null +++ b/docs/reference/celery.worker.job.rst @@ -0,0 +1,11 @@ +===================================== + celery.worker.job +===================================== + +.. contents:: + :local: +.. currentmodule:: celery.worker.job + +.. automodule:: celery.worker.job + :members: + :undoc-members: diff --git a/docs/reference/celery.worker.rst b/docs/reference/celery.worker.rst new file mode 100644 index 0000000..8562c69 --- /dev/null +++ b/docs/reference/celery.worker.rst @@ -0,0 +1,11 @@ +======================================== + celery.worker +======================================== + +.. contents:: + :local: +.. currentmodule:: celery.worker + +.. automodule:: celery.worker + :members: + :undoc-members: diff --git a/docs/reference/celery.worker.state.rst b/docs/reference/celery.worker.state.rst new file mode 100644 index 0000000..31ba74c --- /dev/null +++ b/docs/reference/celery.worker.state.rst @@ -0,0 +1,11 @@ +==================================== + celery.worker.state +==================================== + +.. contents:: + :local: +.. currentmodule:: celery.worker.state + +.. automodule:: celery.worker.state + :members: + :undoc-members: diff --git a/docs/reference/celery.worker.strategy.rst b/docs/reference/celery.worker.strategy.rst new file mode 100644 index 0000000..848cef2 --- /dev/null +++ b/docs/reference/celery.worker.strategy.rst @@ -0,0 +1,11 @@ +==================================== + celery.worker.strategy +==================================== + +.. contents:: + :local: +.. currentmodule:: celery.worker.strategy + +.. automodule:: celery.worker.strategy + :members: + :undoc-members: diff --git a/docs/reference/index.rst b/docs/reference/index.rst new file mode 100644 index 0000000..5f1c72a --- /dev/null +++ b/docs/reference/index.rst @@ -0,0 +1,60 @@ +.. _apiref: + +=============== + API Reference +=============== + +:Release: |version| +:Date: |today| + +.. toctree:: + :maxdepth: 1 + + celery + celery.app + celery.app.task + celery.app.amqp + celery.app.defaults + celery.app.control + celery.app.registry + celery.app.builtins + celery.app.log + celery.app.utils + celery.bootsteps + celery.result + celery.task.http + celery.schedules + celery.signals + celery.security + celery.utils.debug + celery.utils.mail + celery.exceptions + celery.loaders + celery.loaders.app + celery.loaders.default + celery.loaders.base + celery.states + celery.contrib.abortable + celery.contrib.batches + celery.contrib.migrate + celery.contrib.sphinx + celery.contrib.rdb + celery.contrib.methods + celery.events + celery.events.state + celery.beat + celery.apps.worker + celery.apps.beat + celery.worker + celery.worker.consumer + celery.worker.job + celery.worker.state + celery.worker.strategy + celery.bin.base + celery.bin.celery + celery.bin.worker + celery.bin.beat + celery.bin.events + celery.bin.amqp + celery.bin.multi + celery.bin.graph diff --git a/docs/sec/CELERYSA-0001.txt b/docs/sec/CELERYSA-0001.txt new file mode 100644 index 0000000..678f544 --- /dev/null +++ b/docs/sec/CELERYSA-0001.txt @@ -0,0 +1,93 @@ +========================================= + CELERYSA-0001: Celery Security Advisory +========================================= +:contact: security@celeryproject.org +:author: Ask Solem +:CVE id: CVE-2011-4356 +:date: 2011-11-25 04:35:00 P.M GMT + +Details +======= + +:package: celery +:vulnerability: privilege escalation +:problem type: local +:risk: medium +:bug-no: Celery #544 +:versions-affected: 2.1, 2.2, 2.3, 2.4 + +Description +=========== + +The --uid and --gid arguments to the celeryd-multi, +celeryd_detach, celerybeat and celeryev programs shipped +with Celery versions 2.1 and later was not handled properly: +only the effective user was changed, with the real id remaining +unchanged. + +In practice for affected users the vulnerability means that malicious code +loaded in the worker process would be allowed to escalate privileges. + +We take this issue seriously since the Pickle serializer used by +default makes it possible to execute arbitrary code. + +We recommend that users takes steps to secure their systems so that +malicious users cannot abuse the message broker to send messages, +or disable the pickle serializer used in Celery so that arbitrary code +execution is not possible. + +Patches are now available for all maintained versions (see below), +and users are urged to upgrade, even if not directly +affected. + +Systems affected +================ + +Users of Celery versions 2.1, 2.2, 2.3, 2.4 except the recently +released 2.2.8, 2.3.4 and 2.4.4, daemonizing the celery programs +as the root user, using either: + 1) the --uid or --gid arguments, or + 2) the provided generic init scripts with the environment variables + CELERYD_USER or CELERYD_GROUP defined, +are affected. + +Users using the Debian init scripts, CentOS init scripts, OS X launchctl +scripts, Supervisor, or users not starting the programs as the root user +are *not* affected. + +Solution +======== + +Users of the 2.4 series should upgrade to 2.4.4: + + * ``pip install -U celery``, or + * ``easy_install -U celery``, or + * http://pypi.python.org/pypi/celery/2.4.4 + +Users of the 2.3 series should upgrade to 2.3.4: + + * ``pip install -U celery==2.3.4``, or + * ``easy_install -U celery==2.3.4``, or + * http://pypi.python.org/pypi/celery/2.3.4 + +Users of the 2.2 series should upgrade to 2.2.8: + + * ``pip install -U celery==2.2.8``, or + * ``easy_install -U celery==2.2.8``, or + * http://pypi.python.org/pypi/celery/2.2.8 + +The 2.1 series is no longer being maintained, so we urge users +of that series to upgrade to a more recent version. + +Distribution package maintainers are urged to provide their users +with updated packages. + + +Please direct questions to the celery-users mailing-list: +http://groups.google.com/group/celery-users/, + +or if you are planning to report a security issue we request that +you keep the information confidential by contacting +security@celeryproject.org, so that a fix can be issued as quickly as possible. + +Thank you! diff --git a/docs/sec/CELERYSA-0002.txt b/docs/sec/CELERYSA-0002.txt new file mode 100644 index 0000000..dd600b0 --- /dev/null +++ b/docs/sec/CELERYSA-0002.txt @@ -0,0 +1,90 @@ +========================================= + CELERYSA-0002: Celery Security Advisory +========================================= +:contact: security@celeryproject.org +:CVE id: TBA +:date: 2014-07-10 05:00:00 P.M UTC + +Details +======= + +:package: celery +:vulnerability: Environment error +:problem type: local +:risk: low +:versions-affected: 2.5, 3.0, 3.1 + +Description +=========== + +The built-in utility used to daemonize the Celery worker service sets +an insecure umask by default (umask 0). + +This means that any files or directories created by the worker will +end up having world-writable permissions. + +In practice this means that local users will be able to modify and possibly +corrupt the files created by user tasks. + +This is not immediately exploitable but can be if those files are later +evaluated as a program, for example a task that creates Python program files +that are later executed. + +Patches are now available for all maintained versions (see below), +and users are urged to upgrade, even if not directly +affected. + +Acknowledgements +================ + +Special thanks to Red Hat for originally discovering and reporting the issue. + +Systems affected +================ + +Users of Celery versions 3.0, and 3.1, except the recently +released 3.1.13, are affected if daemonizing the +Celery programs using the `--detach` argument or using the `celery multi` program +to start workers in the background, without setting a custom `--umask` +argument. + +Solution +======== + +NOTE: + Not all users of Celery will use it to create files, but if you do + then files may already have been created with insecure permissions. + + So after upgrading, or using the workaround, then please make sure + that files already created are not world writable. + +To work around the issue you can set a custom umask using the ``--umask`` +argument: + + $ celery worker -l info --detach --umask=16 # (022) + +Or you can upgrade to a more recent version: + +- Users of the 3.1 series should upgrade to 3.1.13: + + * ``pip install -U celery``, or + * ``easy_install -U celery``, or + * http://pypi.python.org/pypi/celery/3.1.13 + +- Users of the 3.0 series should upgrade to 3.0.25: + + * ``pip install -U celery==3.0.25``, or + * ``easy_install -U celery==3.0.25``, or + * http://pypi.python.org/pypi/celery/3.0.25 + +Distribution package maintainers are urged to provide their users +with updated packages. + +Please direct questions to the celery-users mailing-list: +http://groups.google.com/group/celery-users/, + +or if you are planning to report a new security related issue we request that +you keep the information confidential by contacting +security@celeryproject.org instead. + +Thank you! diff --git a/docs/templates/readme.txt b/docs/templates/readme.txt new file mode 100644 index 0000000..3ba1063 --- /dev/null +++ b/docs/templates/readme.txt @@ -0,0 +1,16 @@ +================================= + celery - Distributed Task Queue +================================= + +.. image:: http://cloud.github.com/downloads/celery/celery/celery_128.png + +.. include:: ../includes/introduction.txt + +.. include:: ../includes/installation.txt + +.. include:: ../includes/resources.txt + + +.. image:: https://d2weczhvl823v0.cloudfront.net/celery/celery/trend.png + :alt: Bitdeli badge + :target: https://bitdeli.com/free diff --git a/docs/tutorials/daemonizing.rst b/docs/tutorials/daemonizing.rst new file mode 100644 index 0000000..4fbfcbf --- /dev/null +++ b/docs/tutorials/daemonizing.rst @@ -0,0 +1,422 @@ +.. _daemonizing: + +================================ + Running the worker as a daemon +================================ + +Celery does not daemonize itself, please use one of the following +daemonization tools. + +.. contents:: + :local: + + +.. _daemon-generic: + +Generic init scripts +==================== + +See the `extra/generic-init.d/`_ directory Celery distribution. + +This directory contains generic bash init scripts for the +:program:`celery worker` program, +these should run on Linux, FreeBSD, OpenBSD, and other Unix-like platforms. + +.. _`extra/generic-init.d/`: + http://github.com/celery/celery/tree/3.1/extra/generic-init.d/ + +.. _generic-initd-celeryd: + +Init script: celeryd +-------------------- + +:Usage: `/etc/init.d/celeryd {start|stop|restart|status}` +:Configuration file: /etc/default/celeryd + +To configure this script to run the worker properly you probably need to at least +tell it where to change +directory to when it starts (to find the module containing your app, or your +configuration module). + +The daemonization script is configured by the file ``/etc/default/celeryd``, +which is a shell (sh) script. You can add environment variables and the +configuration options below to this file. To add environment variables you +must also export them (e.g. ``export DISPLAY=":0"``) + +.. Admonition:: Superuser privileges required + + The init scripts can only be used by root, + and the shell configuration file must also be owned by root. + + Unprivileged users do not need to use the init script, + instead they can use the :program:`celery multi` utility (or + :program:`celery worker --detach`): + + .. code-block:: bash + + $ celery multi start worker1 \ + -A proj \ + --pidfile="$HOME/run/celery/%n.pid" \ + --logfile="$HOME/log/celery/%n.log" + + $ celery multi restart worker1 \ + -A proj \ + --logfile="$HOME/log/celery/%n%I.log" \ + --pidfile="$HOME/run/celery/%n.pid + + $ celery multi stopwait worker1 --pidfile="$HOME/run/celery/%n.pid" + +.. _generic-initd-celeryd-example: + +Example configuration +~~~~~~~~~~~~~~~~~~~~~ + +This is an example configuration for a Python project. + +:file:`/etc/default/celeryd`: + +.. code-block:: bash + + # Names of nodes to start + # most will only start one node: + CELERYD_NODES="worker1" + # but you can also start multiple and configure settings + # for each in CELERYD_OPTS (see `celery multi --help` for examples). + CELERYD_NODES="worker1 worker2 worker3" + + # Absolute or relative path to the 'celery' command: + CELERY_BIN="/usr/local/bin/celery" + #CELERY_BIN="/virtualenvs/def/bin/celery" + + # App instance to use + # comment out this line if you don't use an app + CELERY_APP="proj" + # or fully qualified: + #CELERY_APP="proj.tasks:app" + + # Where to chdir at start. + CELERYD_CHDIR="/opt/Myproject/" + + # Extra command-line arguments to the worker + CELERYD_OPTS="--time-limit=300 --concurrency=8" + + # %N will be replaced with the first part of the nodename. + CELERYD_LOG_FILE="/var/log/celery/%N.log" + CELERYD_PID_FILE="/var/run/celery/%N.pid" + + # Workers should run as an unprivileged user. + # You need to create this user manually (or you can choose + # a user/group combination that already exists, e.g. nobody). + CELERYD_USER="celery" + CELERYD_GROUP="celery" + + # If enabled pid and log directories will be created if missing, + # and owned by the userid/group configured. + CELERY_CREATE_DIRS=1 + +.. _generic-initd-celeryd-django-example: + +Example Django configuration +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Django users now uses the exact same template as above, +but make sure that the module that defines your Celery app instance +also sets a default value for :envvar:`DJANGO_SETTINGS_MODULE` +as shown in the example Django project in :ref:`django-first-steps`. + +.. _generic-initd-celeryd-options: + +Available options +~~~~~~~~~~~~~~~~~~ + +* CELERY_APP + App instance to use (value for ``--app`` argument). + If you're still using the old API, or django-celery, then you + can omit this setting. + +* CELERY_BIN + Absolute or relative path to the :program:`celery` program. + Examples: + + * :file:`celery` + * :file:`/usr/local/bin/celery` + * :file:`/virtualenvs/proj/bin/celery` + * :file:`/virtualenvs/proj/bin/python -m celery` + +* CELERYD_NODES + List of node names to start (separated by space). + +* CELERYD_OPTS + Additional command-line arguments for the worker, see + `celery worker --help` for a list. This also supports the extended + syntax used by `multi` to configure settings for individual nodes. + See `celery multi --help` for some multi-node configuration examples. + +* CELERYD_CHDIR + Path to change directory to at start. Default is to stay in the current + directory. + +* CELERYD_PID_FILE + Full path to the PID file. Default is /var/run/celery/%N.pid + +* CELERYD_LOG_FILE + Full path to the worker log file. Default is /var/log/celery/%N.log + +* CELERYD_LOG_LEVEL + Worker log level. Default is INFO. + +* CELERYD_USER + User to run the worker as. Default is current user. + +* CELERYD_GROUP + Group to run worker as. Default is current user. + +* CELERY_CREATE_DIRS + Always create directories (log directory and pid file directory). + Default is to only create directories when no custom logfile/pidfile set. + +* CELERY_CREATE_RUNDIR + Always create pidfile directory. By default only enabled when no custom + pidfile location set. + +* CELERY_CREATE_LOGDIR + Always create logfile directory. By default only enable when no custom + logfile location set. + +.. _generic-initd-celerybeat: + +Init script: celerybeat +----------------------- +:Usage: `/etc/init.d/celerybeat {start|stop|restart}` +:Configuration file: /etc/default/celerybeat or /etc/default/celeryd + +.. _generic-initd-celerybeat-example: + +Example configuration +~~~~~~~~~~~~~~~~~~~~~ + +This is an example configuration for a Python project: + +`/etc/default/celerybeat`: + +.. code-block:: bash + + # Absolute or relative path to the 'celery' command: + CELERY_BIN="/usr/local/bin/celery" + #CELERY_BIN="/virtualenvs/def/bin/celery" + + # App instance to use + # comment out this line if you don't use an app + CELERY_APP="proj" + # or fully qualified: + #CELERY_APP="proj.tasks:app" + + # Where to chdir at start. + CELERYBEAT_CHDIR="/opt/Myproject/" + + # Extra arguments to celerybeat + CELERYBEAT_OPTS="--schedule=/var/run/celery/celerybeat-schedule" + +.. _generic-initd-celerybeat-django-example: + +Example Django configuration +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +You should use the same template as above, but make sure the +``DJANGO_SETTINGS_MODULE`` variable is set (and exported), and that +``CELERYD_CHDIR`` is set to the projects directory: + +.. code-block:: bash + + export DJANGO_SETTINGS_MODULE="settings" + + CELERYD_CHDIR="/opt/MyProject" +.. _generic-initd-celerybeat-options: + +Available options +~~~~~~~~~~~~~~~~~ + +* CELERY_APP + App instance to use (value for ``--app`` argument). + +* CELERYBEAT_OPTS + Additional arguments to celerybeat, see `celerybeat --help` for a + list. + +* CELERYBEAT_PID_FILE + Full path to the PID file. Default is /var/run/celeryd.pid. + +* CELERYBEAT_LOG_FILE + Full path to the celeryd log file. Default is /var/log/celeryd.log + +* CELERYBEAT_LOG_LEVEL + Log level to use for celeryd. Default is INFO. + +* CELERYBEAT_USER + User to run beat as. Default is current user. + +* CELERYBEAT_GROUP + Group to run beat as. Default is current user. + +* CELERY_CREATE_DIRS + Always create directories (log directory and pid file directory). + Default is to only create directories when no custom logfile/pidfile set. + +* CELERY_CREATE_RUNDIR + Always create pidfile directory. By default only enabled when no custom + pidfile location set. + +* CELERY_CREATE_LOGDIR + Always create logfile directory. By default only enable when no custom + logfile location set. + +.. _daemon-systemd-generic: + +Usage systemd +============= + +.. _generic-systemd-celery: + +Service file: celery.service +---------------------------- + +:Usage: `systemctl {start|stop|restart|status} celery.service` +:Configuration file: /etc/conf.d/celery + +To create a temporary folders for the log and pid files change user and group in +/usr/lib/tmpfiles.d/celery.conf. +To configure user, group, chdir change settings User, Group and WorkingDirectory defines +in /usr/lib/systemd/system/celery.service. + +.. _generic-systemd-celery-example: + +Example configuration +~~~~~~~~~~~~~~~~~~~~~ + +This is an example configuration for a Python project: + +:file:`/etc/conf.d/celery`: + +.. code-block:: bash + + # Name of nodes to start + # here we have a single node + CELERYD_NODES="w1" + # or we could have three nodes: + #CELERYD_NODES="w1 w2 w3" + + # Absolute or relative path to the 'celery' command: + CELERY_BIN="/usr/local/bin/celery" + #CELERY_BIN="/virtualenvs/def/bin/celery" + + # How to call manage.py + CELERYD_MULTI="multi" + + # Extra command-line arguments to the worker + CELERYD_OPTS="--time-limit=300 --concurrency=8" + + # %N will be replaced with the first part of the nodename. + CELERYD_LOG_FILE="/var/log/celery/%N.log" + CELERYD_PID_FILE="/var/run/celery/%N.pid" + +.. _generic-systemd-celeryd-django-example: + +Example Django configuration +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This is an example configuration for those using `django-celery`: + +.. code-block:: bash + + # Name of nodes to start + # here we have a single node + CELERYD_NODES="w1" + # or we could have three nodes: + #CELERYD_NODES="w1 w2 w3" + + # Absolute path to "manage.py" + CELERY_BIN="/opt/Myproject/manage.py" + + # How to call manage.py + CELERYD_MULTI="celery multi" + + # Extra command-line arguments to the worker + CELERYD_OPTS="--time-limit=300 --concurrency=8" + + # %N will be replaced with the first part of the nodename. + CELERYD_LOG_FILE="/var/log/celery/%N.log" + CELERYD_PID_FILE="/var/run/celery/%N.pid" + +To add an environment variable such as DJANGO_SETTINGS_MODULE use the +Environment in celery.service. + +.. _generic-initd-troubleshooting: + +Troubleshooting +--------------- + +If you can't get the init scripts to work, you should try running +them in *verbose mode*: + +.. code-block:: bash + + # sh -x /etc/init.d/celeryd start + +This can reveal hints as to why the service won't start. + +If the worker starts with "OK" but exits almost immediately afterwards +and there is nothing in the log file, then there is probably an error +but as the daemons standard outputs are already closed you'll +not be able to see them anywhere. For this situation you can use +the :envvar:`C_FAKEFORK` environment variable to skip the +daemonization step: + +.. code-block:: bash + + C_FAKEFORK=1 sh -x /etc/init.d/celeryd start + + +and now you should be able to see the errors. + +Commonly such errors are caused by insufficient permissions +to read from, or write to a file, and also by syntax errors +in configuration modules, user modules, 3rd party libraries, +or even from Celery itself (if you've found a bug, in which case +you should :ref:`report it `). + +.. _daemon-supervisord: + +`supervisord`_ +============== + +* `extra/supervisord/`_ + +.. _`extra/supervisord/`: + http://github.com/celery/celery/tree/3.1/extra/supervisord/ +.. _`supervisord`: http://supervisord.org/ + +.. _daemon-launchd: + +launchd (OS X) +============== + +* `extra/osx`_ + +.. _`extra/osx`: + http://github.com/celery/celery/tree/3.1/extra/osx/ + + +.. _daemon-windows: + +Windows +======= + +See this excellent external tutorial: + +http://www.calazan.com/windows-tip-run-applications-in-the-background-using-task-scheduler/ + +CentOS +====== +In CentOS we can take advantage of built-in service helpers, such as the +pid-based status checker function in ``/etc/init.d/functions``. +See the sample script in http://github.com/celery/celery/tree/3.1/extra/centos/. diff --git a/docs/tutorials/debugging.rst b/docs/tutorials/debugging.rst new file mode 100644 index 0000000..7eb8e5c --- /dev/null +++ b/docs/tutorials/debugging.rst @@ -0,0 +1,104 @@ +.. _tut-remote_debug: + +====================================== + Debugging Tasks Remotely (using pdb) +====================================== + +Basics +====== + +:mod:`celery.contrib.rdb` is an extended version of :mod:`pdb` that +enables remote debugging of processes that does not have terminal +access. + +Example usage: + +.. code-block:: python + + from celery import task + from celery.contrib import rdb + + @task() + def add(x, y): + result = x + y + rdb.set_trace() # <- set breakpoint + return result + + +:func:`~celery.contrib.rdb.set_trace` sets a breakpoint at the current +location and creates a socket you can telnet into to remotely debug +your task. + +The debugger may be started by multiple processes at the same time, +so rather than using a fixed port the debugger will search for an +available port, starting from the base port (6900 by default). +The base port can be changed using the environment variable +:envvar:`CELERY_RDB_PORT`. + +By default the debugger will only be available from the local host, +to enable access from the outside you have to set the environment +variable :envvar:`CELERY_RDB_HOST`. + +When the worker encounters your breakpoint it will log the following +information:: + + [INFO/MainProcess] Received task: + tasks.add[d7261c71-4962-47e5-b342-2448bedd20e8] + [WARNING/PoolWorker-1] Remote Debugger:6900: + Please telnet 127.0.0.1 6900. Type `exit` in session to continue. + [2011-01-18 14:25:44,119: WARNING/PoolWorker-1] Remote Debugger:6900: + Waiting for client... + +If you telnet the port specified you will be presented +with a `pdb` shell: + +.. code-block:: bash + + $ telnet localhost 6900 + Connected to localhost. + Escape character is '^]'. + > /opt/devel/demoapp/tasks.py(128)add() + -> return result + (Pdb) + +Enter ``help`` to get a list of available commands, +It may be a good idea to read the `Python Debugger Manual`_ if +you have never used `pdb` before. + +To demonstrate, we will read the value of the ``result`` variable, +change it and continue execution of the task:: + + (Pdb) result + 4 + (Pdb) result = 'hello from rdb' + (Pdb) continue + Connection closed by foreign host. + +The result of our vandalism can be seen in the worker logs:: + + [2011-01-18 14:35:36,599: INFO/MainProcess] Task + tasks.add[d7261c71-4962-47e5-b342-2448bedd20e8] succeeded + in 61.481s: 'hello from rdb' + +.. _`Python Debugger Manual`: http://docs.python.org/library/pdb.html + + +Tips +==== + +.. _breakpoint_signal: + +Enabling the breakpoint signal +------------------------------ + +If the environment variable :envvar:`CELERY_RDBSIG` is set, the worker +will open up an rdb instance whenever the `SIGUSR2` signal is sent. +This is the case for both main and worker processes. + +For example starting the worker with:: + + CELERY_RDBSIG=1 celery worker -l info + +You can start an rdb session for any of the worker processes by executing:: + + kill -USR2 diff --git a/docs/tutorials/index.rst b/docs/tutorials/index.rst new file mode 100644 index 0000000..5f52eea --- /dev/null +++ b/docs/tutorials/index.rst @@ -0,0 +1,13 @@ +=========== + Tutorials +=========== + +:Release: |version| +:Date: |today| + +.. toctree:: + :maxdepth: 2 + + daemonizing + debugging + task-cookbook diff --git a/docs/tutorials/task-cookbook.rst b/docs/tutorials/task-cookbook.rst new file mode 100644 index 0000000..ad772a7 --- /dev/null +++ b/docs/tutorials/task-cookbook.rst @@ -0,0 +1,63 @@ +.. _cookbook-tasks: + +================ + Task Cookbook +================ + +.. contents:: + :local: + +.. _cookbook-task-serial: + +Ensuring a task is only executed one at a time +============================================== + +You can accomplish this by using a lock. + +In this example we'll be using the cache framework to set a lock that is +accessible for all workers. + +It's part of an imaginary RSS feed importer called `djangofeeds`. +The task takes a feed URL as a single argument, and imports that feed into +a Django model called `Feed`. We ensure that it's not possible for two or +more workers to import the same feed at the same time by setting a cache key +consisting of the MD5 checksum of the feed URL. + +The cache key expires after some time in case something unexpected happens +(you never know, right?) + +.. code-block:: python + + from celery import task + from celery.utils.log import get_task_logger + from django.core.cache import cache + from django.utils.hashcompat import md5_constructor as md5 + from djangofeeds.models import Feed + + logger = get_task_logger(__name__) + + LOCK_EXPIRE = 60 * 5 # Lock expires in 5 minutes + + @task + def import_feed(feed_url): + # The cache key consists of the task name and the MD5 digest + # of the feed URL. + feed_url_digest = md5(feed_url).hexdigest() + lock_id = '{0}-lock-{1}'.format(self.name, feed_url_hexdigest) + + # cache.add fails if if the key already exists + acquire_lock = lambda: cache.add(lock_id, 'true', LOCK_EXPIRE) + # memcache delete is very slow, but we have to use it to take + # advantage of using add() for atomic locking + release_lock = lambda: cache.delete(lock_id) + + logger.debug('Importing feed: %s', feed_url) + if acquire_lock(): + try: + feed = Feed.objects.import_feed(feed_url) + finally: + release_lock() + return feed.url + + logger.debug( + 'Feed %s is already being imported by another worker', feed_url) diff --git a/docs/userguide/application.rst b/docs/userguide/application.rst new file mode 100644 index 0000000..4ebc142 --- /dev/null +++ b/docs/userguide/application.rst @@ -0,0 +1,534 @@ +.. _guide-app: + +============= + Application +============= + +.. contents:: + :local: + :depth: 1 + +The Celery library must be instantiated before use, this instance +is called an application (or *app* for short). + +The application is thread-safe so that multiple Celery applications +with different configuration, components and tasks can co-exist in the +same process space. + +Let's create one now: + +.. code-block:: python + + >>> from celery import Celery + >>> app = Celery() + >>> app + + +The last line shows the textual representation of the application, +which includes the name of the celery class (``Celery``), the name of the +current main module (``__main__``), and the memory address of the object +(``0x100469fd0``). + +Main Name +========= + +Only one of these is important, and that is the main module name, +let's look at why that is. + +When you send a task message in Celery, that message will not contain +any source code, but only the name of the task you want to execute. +This works similarly to how host names works on the internet: every worker +maintains a mapping of task names to their actual functions, called the *task +registry*. + +Whenever you define a task, that task will also be added to the local registry: + +.. code-block:: python + + >>> @app.task + ... def add(x, y): + ... return x + y + + >>> add + <@task: __main__.add> + + >>> add.name + __main__.add + + >>> app.tasks['__main__.add'] + <@task: __main__.add> + +and there you see that ``__main__`` again; whenever Celery is not able +to detect what module the function belongs to, it uses the main module +name to generate the beginning of the task name. + +This is only a problem in a limited set of use cases: + + #. If the module that the task is defined in is run as a program. + #. If the application is created in the Python shell (REPL). + +For example here, where the tasks module is also used to start a worker: + +:file:`tasks.py`: + +.. code-block:: python + + from celery import Celery + app = Celery() + + @app.task + def add(x, y): return x + y + + if __name__ == '__main__': + app.worker_main() + +When this module is executed the tasks will be named starting with "``__main__``", +but when the module is imported by another process, say to call a task, +the tasks will be named starting with "``tasks``" (the real name of the module):: + + >>> from tasks import add + >>> add.name + tasks.add + +You can specify another name for the main module: + +.. code-block:: python + + >>> app = Celery('tasks') + >>> app.main + 'tasks' + + >>> @app.task + ... def add(x, y): + ... return x + y + + >>> add.name + tasks.add + +.. seealso:: :ref:`task-names` + +Configuration +============= + +There are several options you can set that will change how +Celery works. These options can be set directly on the app instance, +or you can use a dedicated configuration module. + +The configuration is available as :attr:`@Celery.conf`:: + + >>> app.conf.CELERY_TIMEZONE + 'Europe/London' + +where you can also set configuration values directly:: + + >>> app.conf.CELERY_ENABLE_UTC = True + +and update several keys at once by using the ``update`` method:: + + >>> app.conf.update( + ... CELERY_ENABLE_UTC=True, + ... CELERY_TIMEZONE='Europe/London', + ...) + +The configuration object consists of multiple dictionaries +that are consulted in order: + + #. Changes made at runtime. + #. The configuration module (if any) + #. The default configuration (:mod:`celery.app.defaults`). + +You can even add new default sources by using the :meth:`@Celery.add_defaults` +method. + +.. seealso:: + + Go to the :ref:`Configuration reference ` for a complete + listing of all the available settings, and their default values. + +``config_from_object`` +---------------------- + +The :meth:`@Celery.config_from_object` method loads configuration +from a configuration object. + +This can be a configuration module, or any object with configuration attributes. + +Note that any configuration that was previous set will be reset when +:meth:`~@Celery.config_from_object` is called. If you want to set additional +configuration you should do so after. + +Example 1: Using the name of a module +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: python + + from celery import Celery + + app = Celery() + app.config_from_object('celeryconfig') + + +The ``celeryconfig`` module may then look like this: + +:file:`celeryconfig.py`: + +.. code-block:: python + + CELERY_ENABLE_UTC = True + CELERY_TIMEZONE = 'Europe/London' + +Example 2: Using a configuration module +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. tip:: + + Using the name of a module is recomended + as this means that the module doesn't need to be serialized + when the prefork pool is used. If you're + experiencing configuration pickle errors then please try using + the name of a module instead. + +.. code-block:: python + + from celery import Celery + + app = Celery() + import celeryconfig + app.config_from_object(celeryconfig) + +Example 3: Using a configuration class/object +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: python + + from celery import Celery + + app = Celery() + + class Config: + CELERY_ENABLE_UTC = True + CELERY_TIMEZONE = 'Europe/London' + + app.config_from_object(Config) + # or using the fully qualified name of the object: + # app.config_from_object('module:Config') + +``config_from_envvar`` +---------------------- + +The :meth:`@Celery.config_from_envvar` takes the configuration module name +from an environment variable + +For example -- to load configuration from a module specified in the +environment variable named :envvar:`CELERY_CONFIG_MODULE`: + +.. code-block:: python + + import os + from celery import Celery + + #: Set default configuration module name + os.environ.setdefault('CELERY_CONFIG_MODULE', 'celeryconfig') + + app = Celery() + app.config_from_envvar('CELERY_CONFIG_MODULE') + +You can then specify the configuration module to use via the environment: + +.. code-block:: bash + + $ CELERY_CONFIG_MODULE="celeryconfig.prod" celery worker -l info + +.. _app-censored-config: + +Censored configuration +---------------------- + +If you ever want to print out the configuration, as debugging information +or similar, you may also want to filter out sensitive information like +passwords and API keys. + +Celery comes with several utilities used for presenting the configuration, +one is :meth:`~celery.app.utils.Settings.humanize`: + +.. code-block:: python + + >>> app.conf.humanize(with_defaults=False, censored=True) + +This method returns the configuration as a tabulated string. This will +only contain changes to the configuration by default, but you can include the +default keys and values by changing the ``with_defaults`` argument. + +If you instead want to work with the configuration as a dictionary, then you +can use the :meth:`~celery.app.utils.Settings.table` method: + +.. code-block:: python + + >>> app.conf.table(with_defaults=False, censored=True) + +Please note that Celery will not be able to remove all sensitive information, +as it merely uses a regular expression to search for commonly named keys. +If you add custom settings containing sensitive information you should name +the keys using a name that Celery identifies as secret. + +A configuration setting will be censored if the name contains any of +these substrings: + +``API``, ``TOKEN``, ``KEY``, ``SECRET``, ``PASS``, ``SIGNATURE``, ``DATABASE`` + +Laziness +======== + +The application instance is lazy, meaning that it will not be evaluated +until something is actually needed. + +Creating a :class:`@Celery` instance will only do the following: + + #. Create a logical clock instance, used for events. + #. Create the task registry. + #. Set itself as the current app (but not if the ``set_as_current`` + argument was disabled) + #. Call the :meth:`@Celery.on_init` callback (does nothing by default). + +The :meth:`~@Celery.task` decorator does not actually create the +tasks at the point when it's called, instead it will defer the creation +of the task to happen either when the task is used, or after the +application has been *finalized*, + +This example shows how the task is not created until +you use the task, or access an attribute (in this case :meth:`repr`): + +.. code-block:: python + + >>> @app.task + >>> def add(x, y): + ... return x + y + + >>> type(add) + + + >>> add.__evaluated__() + False + + >>> add # <-- causes repr(add) to happen + <@task: __main__.add> + + >>> add.__evaluated__() + True + +*Finalization* of the app happens either explicitly by calling +:meth:`@Celery.finalize` -- or implicitly by accessing the :attr:`~@Celery.tasks` +attribute. + +Finalizing the object will: + + #. Copy tasks that must be shared between apps + + Tasks are shared by default, but if the + ``shared`` argument to the task decorator is disabled, + then the task will be private to the app it's bound to. + + #. Evaluate all pending task decorators. + + #. Make sure all tasks are bound to the current app. + + Tasks are bound to apps so that it can read default + values from the configuration. + +.. _default-app: + +.. topic:: The "default app". + + Celery did not always work this way, it used to be that + there was only a module-based API, and for backwards compatibility + the old API is still there. + + Celery always creates a special app that is the "default app", + and this is used if no custom application has been instantiated. + + The :mod:`celery.task` module is there to accommodate the old API, + and should not be used if you use a custom app. You should + always use the methods on the app instance, not the module based API. + + For example, the old Task base class enables many compatibility + features where some may be incompatible with newer features, such + as task methods: + + .. code-block:: python + + from celery.task import Task # << OLD Task base class. + + from celery import Task # << NEW base class. + + The new base class is recommended even if you use the old + module-based API. + + +Breaking the chain +================== + +While it's possible to depend on the current app +being set, the best practice is to always pass the app instance +around to anything that needs it. + +I call this the "app chain", since it creates a chain +of instances depending on the app being passed. + +The following example is considered bad practice: + +.. code-block:: python + + from celery import current_app + + class Scheduler(object): + + def run(self): + app = current_app + +Instead it should take the ``app`` as an argument: + +.. code-block:: python + + class Scheduler(object): + + def __init__(self, app): + self.app = app + +Internally Celery uses the :func:`celery.app.app_or_default` function +so that everything also works in the module-based compatibility API + +.. code-block:: python + + from celery.app import app_or_default + + class Scheduler(object): + def __init__(self, app=None): + self.app = app_or_default(app) + +In development you can set the :envvar:`CELERY_TRACE_APP` +environment variable to raise an exception if the app +chain breaks: + +.. code-block:: bash + + $ CELERY_TRACE_APP=1 celery worker -l info + + +.. topic:: Evolving the API + + Celery has changed a lot in the 3 years since it was initially + created. + + For example, in the beginning it was possible to use any callable as + a task: + + .. code-block:: python + + def hello(to): + return 'hello {0}'.format(to) + + >>> from celery.execute import apply_async + + >>> apply_async(hello, ('world!', )) + + or you could also create a ``Task`` class to set + certain options, or override other behavior + + .. code-block:: python + + from celery.task import Task + from celery.registry import tasks + + class Hello(Task): + send_error_emails = True + + def run(self, to): + return 'hello {0}'.format(to) + tasks.register(Hello) + + >>> Hello.delay('world!') + + Later, it was decided that passing arbitrary call-ables + was an anti-pattern, since it makes it very hard to use + serializers other than pickle, and the feature was removed + in 2.0, replaced by task decorators: + + .. code-block:: python + + from celery.task import task + + @task(send_error_emails=True) + def hello(x): + return 'hello {0}'.format(to) + +Abstract Tasks +============== + +All tasks created using the :meth:`~@Celery.task` decorator +will inherit from the applications base :attr:`~@Celery.Task` class. + +You can specify a different base class with the ``base`` argument: + +.. code-block:: python + + @app.task(base=OtherTask): + def add(x, y): + return x + y + +To create a custom task class you should inherit from the neutral base +class: :class:`celery.Task`. + +.. code-block:: python + + from celery import Task + + class DebugTask(Task): + abstract = True + + def __call__(self, *args, **kwargs): + print('TASK STARTING: {0.name}[{0.request.id}]'.format(self)) + return super(DebugTask, self).__call__(*args, **kwargs) + + +.. tip:: + + If you override the tasks ``__call__`` method, then it's very important + that you also call super so that the base call method can set up the + default request used when a task is called directly. + +The neutral base class is special because it's not bound to any specific app +yet. Concrete subclasses of this class will be bound, so you should +always mark generic base classes as ``abstract`` + +Once a task is bound to an app it will read configuration to set default values +and so on. + +It's also possible to change the default base class for an application +by changing its :meth:`@Celery.Task` attribute: + +.. code-block:: python + + >>> from celery import Celery, Task + + >>> app = Celery() + + >>> class MyBaseTask(Task): + ... abstract = True + ... send_error_emails = True + + >>> app.Task = MyBaseTask + >>> app.Task + + + >>> @x.task + ... def add(x, y): + ... return x + y + + >>> add + <@task: __main__.add> + + >>> add.__class__.mro() + [>, + , + , + ] diff --git a/docs/userguide/calling.rst b/docs/userguide/calling.rst new file mode 100644 index 0000000..0931660 --- /dev/null +++ b/docs/userguide/calling.rst @@ -0,0 +1,495 @@ +.. _guide-calling: + +=============== + Calling Tasks +=============== + +.. contents:: + :local: + :depth: 1 + + +.. _calling-basics: + +Basics +====== + +This document describes Celery's uniform "Calling API" +used by task instances and the :ref:`canvas `. + +The API defines a standard set of execution options, as well as three methods: + + - ``apply_async(args[, kwargs[, …]])`` + + Sends a task message. + + - ``delay(*args, **kwargs)`` + + Shortcut to send a task message, but does not support execution + options. + + - *calling* (``__call__``) + + Applying an object supporting the calling API (e.g. ``add(2, 2)``) + means that the task will be executed in the current process, and + not by a worker (a message will not be sent). + +.. _calling-cheat: + +.. topic:: Quick Cheat Sheet + + - ``T.delay(arg, kwarg=value)`` + always a shortcut to ``.apply_async``. + + - ``T.apply_async((arg, ), {'kwarg': value})`` + + - ``T.apply_async(countdown=10)`` + executes 10 seconds from now. + + - ``T.apply_async(eta=now + timedelta(seconds=10))`` + executes 10 seconds from now, specifed using ``eta`` + + - ``T.apply_async(countdown=60, expires=120)`` + executes in one minute from now, but expires after 2 minutes. + + - ``T.apply_async(expires=now + timedelta(days=2))`` + expires in 2 days, set using :class:`~datetime.datetime`. + + +Example +------- + +The :meth:`~@Task.delay` method is convenient as it looks like calling a regular +function: + +.. code-block:: python + + task.delay(arg1, arg2, kwarg1='x', kwarg2='y') + +Using :meth:`~@Task.apply_async` instead you have to write: + +.. code-block:: python + + task.apply_async(args=[arg1, arg2], kwargs={'kwarg1': 'x', 'kwarg2': 'y'}) + +.. sidebar:: Tip + + If the task is not registered in the current process + you can use :meth:`~@send_task` to call the task by name instead. + + +So `delay` is clearly convenient, but if you want to set additional execution +options you have to use ``apply_async``. + +The rest of this document will go into the task execution +options in detail. All examples use a task +called `add`, returning the sum of two arguments: + +.. code-block:: python + + @app.task + def add(x, y): + return x + y + + +.. topic:: There's another way… + + You will learn more about this later while reading about the :ref:`Canvas + `, but :class:`~celery.subtask`'s are objects used to pass around + the signature of a task invocation, (for example to send it over the + network), and they also support the Calling API: + + .. code-block:: python + + task.s(arg1, arg2, kwarg1='x', kwargs2='y').apply_async() + +.. _calling-links: + +Linking (callbacks/errbacks) +============================ + +Celery supports linking tasks together so that one task follows another. +The callback task will be applied with the result of the parent task +as a partial argument: + +.. code-block:: python + + add.apply_async((2, 2), link=add.s(16)) + +.. sidebar:: What is ``s``? + + The ``add.s`` call used here is called a subtask, I talk + more about subtasks in the :ref:`canvas guide `, + where you can also learn about :class:`~celery.chain`, which + is a simpler way to chain tasks together. + + In practice the ``link`` execution option is considered an internal + primitive, and you will probably not use it directly, but + rather use chains instead. + +Here the result of the first task (4) will be sent to a new +task that adds 16 to the previous result, forming the expression +:math:`(2 + 2) + 16 = 20` + + +You can also cause a callback to be applied if task raises an exception +(*errback*), but this behaves differently from a regular callback +in that it will be passed the id of the parent task, not the result. +This is because it may not always be possible to serialize +the exception raised, and so this way the error callback requires +a result backend to be enabled, and the task must retrieve the result +of the task instead. + +This is an example error callback: + +.. code-block:: python + + @app.task + def error_handler(uuid): + result = AsyncResult(uuid) + exc = result.get(propagate=False) + print('Task {0} raised exception: {1!r}\n{2!r}'.format( + uuid, exc, result.traceback)) + +it can be added to the task using the ``link_error`` execution +option: + +.. code-block:: python + + add.apply_async((2, 2), link_error=error_handler.s()) + + +In addition, both the ``link`` and ``link_error`` options can be expressed +as a list:: + + add.apply_async((2, 2), link=[add.s(16), other_task.s()]) + +The callbacks/errbacks will then be called in order, and all +callbacks will be called with the return value of the parent task +as a partial argument. + +.. _calling-eta: + +ETA and countdown +================= + +The ETA (estimated time of arrival) lets you set a specific date and time that +is the earliest time at which your task will be executed. `countdown` is +a shortcut to set eta by seconds into the future. + +.. code-block:: python + + >>> result = add.apply_async((2, 2), countdown=3) + >>> result.get() # this takes at least 3 seconds to return + 20 + +The task is guaranteed to be executed at some time *after* the +specified date and time, but not necessarily at that exact time. +Possible reasons for broken deadlines may include many items waiting +in the queue, or heavy network latency. To make sure your tasks +are executed in a timely manner you should monitor the queue for congestion. Use +Munin, or similar tools, to receive alerts, so appropriate action can be +taken to ease the workload. See :ref:`monitoring-munin`. + +While `countdown` is an integer, `eta` must be a :class:`~datetime.datetime` +object, specifying an exact date and time (including millisecond precision, +and timezone information): + +.. code-block:: python + + >>> from datetime import datetime, timedelta + + >>> tomorrow = datetime.utcnow() + timedelta(days=1) + >>> add.apply_async((2, 2), eta=tomorrow) + +.. _calling-expiration: + +Expiration +========== + +The `expires` argument defines an optional expiry time, +either as seconds after task publish, or a specific date and time using +:class:`~datetime.datetime`: + +.. code-block:: python + + >>> # Task expires after one minute from now. + >>> add.apply_async((10, 10), expires=60) + + >>> # Also supports datetime + >>> from datetime import datetime, timedelta + >>> add.apply_async((10, 10), kwargs, + ... expires=datetime.now() + timedelta(days=1) + + +When a worker receives an expired task it will mark +the task as :state:`REVOKED` (:exc:`~@TaskRevokedError`). + +.. _calling-retry: + +Message Sending Retry +===================== + +Celery will automatically retry sending messages in the event of connection +failure, and retry behavior can be configured -- like how often to retry, or a maximum +number of retries -- or disabled all together. + +To disable retry you can set the ``retry`` execution option to :const:`False`: + +.. code-block:: python + + add.apply_async((2, 2), retry=False) + +.. topic:: Related Settings + + .. hlist:: + :columns: 2 + + - :setting:`CELERY_TASK_PUBLISH_RETRY` + - :setting:`CELERY_TASK_PUBLISH_RETRY_POLICY` + +Retry Policy +------------ + +A retry policy is a mapping that controls how retries behave, +and can contain the following keys: + +- `max_retries` + + Maximum number of retries before giving up, in this case the + exception that caused the retry to fail will be raised. + + A value of 0 or :const:`None` means it will retry forever. + + The default is to retry 3 times. + +- `interval_start` + + Defines the number of seconds (float or integer) to wait between + retries. Default is 0, which means the first retry will be + instantaneous. + +- `interval_step` + + On each consecutive retry this number will be added to the retry + delay (float or integer). Default is 0.2. + +- `interval_max` + + Maximum number of seconds (float or integer) to wait between + retries. Default is 0.2. + +For example, the default policy correlates to: + +.. code-block:: python + + add.apply_async((2, 2), retry=True, retry_policy={ + 'max_retries': 3, + 'interval_start': 0, + 'interval_step': 0.2, + 'interval_max': 0.2, + }) + +the maximum time spent retrying will be 0.4 seconds. It is set relatively +short by default because a connection failure could lead to a retry pile effect +if the broker connection is down: e.g. many web server processes waiting +to retry blocking other incoming requests. + +.. _calling-serializers: + +Serializers +=========== + +.. sidebar:: Security + + The pickle module allows for execution of arbitrary functions, + please see the :ref:`security guide `. + + Celery also comes with a special serializer that uses + cryptography to sign your messages. + +Data transferred between clients and workers needs to be serialized, +so every message in Celery has a ``content_type`` header that +describes the serialization method used to encode it. + +The default serializer is :mod:`pickle`, but you can +change this using the :setting:`CELERY_TASK_SERIALIZER` setting, +or for each individual task, or even per message. + +There's built-in support for :mod:`pickle`, `JSON`, `YAML` +and `msgpack`, and you can also add your own custom serializers by registering +them into the Kombu serializer registry (see ref:`kombu:guide-serialization`). + +Each option has its advantages and disadvantages. + +json -- JSON is supported in many programming languages, is now + a standard part of Python (since 2.6), and is fairly fast to decode + using the modern Python libraries such as :mod:`cjson` or :mod:`simplejson`. + + The primary disadvantage to JSON is that it limits you to the following + data types: strings, Unicode, floats, boolean, dictionaries, and lists. + Decimals and dates are notably missing. + + Also, binary data will be transferred using Base64 encoding, which will + cause the transferred data to be around 34% larger than an encoding which + supports native binary types. + + However, if your data fits inside the above constraints and you need + cross-language support, the default setting of JSON is probably your + best choice. + + See http://json.org for more information. + +pickle -- If you have no desire to support any language other than + Python, then using the pickle encoding will gain you the support of + all built-in Python data types (except class instances), smaller + messages when sending binary files, and a slight speedup over JSON + processing. + + See http://docs.python.org/library/pickle.html for more information. + +yaml -- YAML has many of the same characteristics as json, + except that it natively supports more data types (including dates, + recursive references, etc.) + + However, the Python libraries for YAML are a good bit slower than the + libraries for JSON. + + If you need a more expressive set of data types and need to maintain + cross-language compatibility, then YAML may be a better fit than the above. + + See http://yaml.org/ for more information. + +msgpack -- msgpack is a binary serialization format that is closer to JSON + in features. It is very young however, and support should be considered + experimental at this point. + + See http://msgpack.org/ for more information. + +The encoding used is available as a message header, so the worker knows how to +deserialize any task. If you use a custom serializer, this serializer must +be available for the worker. + +The following order is used to decide which serializer +to use when sending a task: + + 1. The `serializer` execution option. + 2. The :attr:`@-Task.serializer` attribute + 3. The :setting:`CELERY_TASK_SERIALIZER` setting. + + +Example setting a custom serializer for a single task invocation: + +.. code-block:: python + + >>> add.apply_async((10, 10), serializer='json') + +.. _calling-compression: + +Compression +=========== + +Celery can compress the messages using either *gzip*, or *bzip2*. +You can also create your own compression schemes and register +them in the :func:`kombu compression registry `. + +The following order is used to decide which compression scheme +to use when sending a task: + + 1. The `compression` execution option. + 2. The :attr:`@-Task.compression` attribute. + 3. The :setting:`CELERY_MESSAGE_COMPRESSION` attribute. + +Example specifying the compression used when calling a task:: + + >>> add.apply_async((2, 2), compression='zlib') + +.. _calling-connections: + +Connections +=========== + +.. sidebar:: Automatic Pool Support + + Since version 2.3 there is support for automatic connection pools, + so you don't have to manually handle connections and publishers + to reuse connections. + + The connection pool is enabled by default since version 2.5. + + See the :setting:`BROKER_POOL_LIMIT` setting for more information. + +You can handle the connection manually by creating a +publisher: + +.. code-block:: python + + + results = [] + with add.app.pool.acquire(block=True) as connection: + with add.get_publisher(connection) as publisher: + try: + for args in numbers: + res = add.apply_async((2, 2), publisher=publisher) + results.append(res) + print([res.get() for res in results]) + + +Though this particular example is much better expressed as a group: + +.. code-block:: python + + >>> from celery import group + + >>> numbers = [(2, 2), (4, 4), (8, 8), (16, 16)] + >>> res = group(add.subtask(n) for i in numbers).apply_async() + + >>> res.get() + [4, 8, 16, 32] + +.. _calling-routing: + +Routing options +=============== + +Celery can route tasks to different queues. + +Simple routing (name <-> name) is accomplished using the ``queue`` option:: + + add.apply_async(queue='priority.high') + +You can then assign workers to the ``priority.high`` queue by using +the workers :option:`-Q` argument: + +.. code-block:: bash + + $ celery -A proj worker -l info -Q celery,priority.high + +.. seealso:: + + Hard-coding queue names in code is not recommended, the best practice + is to use configuration routers (:setting:`CELERY_ROUTES`). + + To find out more about routing, please see :ref:`guide-routing`. + +Advanced Options +---------------- + +These options are for advanced users who want to take use of +AMQP's full routing capabilities. Interested parties may read the +:ref:`routing guide `. + +- exchange + + Name of exchange (or a :class:`kombu.entity.Exchange`) to + send the message to. + +- routing_key + + Routing key used to determine. + +- priority + + A number between `0` and `9`, where `0` is the highest priority. + + Supported by: redis, beanstalk diff --git a/docs/userguide/canvas.rst b/docs/userguide/canvas.rst new file mode 100644 index 0000000..0afff4d --- /dev/null +++ b/docs/userguide/canvas.rst @@ -0,0 +1,938 @@ +.. _guide-canvas: + +============================= + Canvas: Designing Workflows +============================= + +.. contents:: + :local: + :depth: 2 + +.. _canvas-subtasks: + +.. _canvas-signatures: + +Signatures +========== + +.. versionadded:: 2.0 + +You just learned how to call a task using the tasks ``delay`` method +in the :ref:`calling ` guide, and this is often all you need, +but sometimes you may want to pass the signature of a task invocation to +another process or as an argument to another function. + +A :func:`~celery.signature` wraps the arguments, keyword arguments, and execution options +of a single task invocation in a way such that it can be passed to functions +or even serialized and sent across the wire. + +Signatures are often nicknamed "subtasks" because they describe a task to be called +within a task. + +- You can create a signature for the ``add`` task using its name like this:: + + >>> from celery import signature + >>> signature('tasks.add', args=(2, 2), countdown=10) + tasks.add(2, 2) + + This task has a signature of arity 2 (two arguments): ``(2, 2)``, + and sets the countdown execution option to 10. + +- or you can create one using the task's ``subtask`` method:: + + >>> add.subtask((2, 2), countdown=10) + tasks.add(2, 2) + +- There is also a shortcut using star arguments:: + + >>> add.s(2, 2) + tasks.add(2, 2) + +- Keyword arguments are also supported:: + + >>> add.s(2, 2, debug=True) + tasks.add(2, 2, debug=True) + +- From any signature instance you can inspect the different fields:: + + >>> s = add.subtask((2, 2), {'debug': True}, countdown=10) + >>> s.args + (2, 2) + >>> s.kwargs + {'debug': True} + >>> s.options + {'countdown': 10} + +- It supports the "Calling API" which means it supports ``delay`` and + ``apply_async`` or being called directly. + + Calling the signature will execute the task inline in the current process:: + + >>> add(2, 2) + 4 + >>> add.s(2, 2)() + 4 + + ``delay`` is our beloved shortcut to ``apply_async`` taking star-arguments:: + + >>> result = add.delay(2, 2) + >>> result.get() + 4 + + ``apply_async`` takes the same arguments as the :meth:`Task.apply_async <@Task.apply_async>` method:: + + >>> add.apply_async(args, kwargs, **options) + >>> add.subtask(args, kwargs, **options).apply_async() + + >>> add.apply_async((2, 2), countdown=1) + >>> add.subtask((2, 2), countdown=1).apply_async() + +- You can't define options with :meth:`~@Task.s`, but a chaining + ``set`` call takes care of that:: + + >>> add.s(2, 2).set(countdown=1) + proj.tasks.add(2, 2) + +Partials +-------- + +With a signature, you can execute the task in a worker:: + + >>> add.s(2, 2).delay() + >>> add.s(2, 2).apply_async(countdown=1) + +Or you can call it directly in the current process:: + + >>> add.s(2, 2)() + 4 + +Specifying additional args, kwargs or options to ``apply_async``/``delay`` +creates partials: + +- Any arguments added will be prepended to the args in the signature:: + + >>> partial = add.s(2) # incomplete signature + >>> partial.delay(4) # 2 + 4 + >>> partial.apply_async((4, )) # same + +- Any keyword arguments added will be merged with the kwargs in the signature, + with the new keyword arguments taking precedence:: + + >>> s = add.s(2, 2) + >>> s.delay(debug=True) # -> add(2, 2, debug=True) + >>> s.apply_async(kwargs={'debug': True}) # same + +- Any options added will be merged with the options in the signature, + with the new options taking precedence:: + + >>> s = add.subtask((2, 2), countdown=10) + >>> s.apply_async(countdown=1) # countdown is now 1 + +You can also clone signatures to create derivates: + + >>> s = add.s(2) + proj.tasks.add(2) + + >>> s.clone(args=(4, ), kwargs={'debug': True}) + proj.tasks.add(2, 4, debug=True) + +Immutability +------------ + +.. versionadded:: 3.0 + +Partials are meant to be used with callbacks, any tasks linked or chord +callbacks will be applied with the result of the parent task. +Sometimes you want to specify a callback that does not take +additional arguments, and in that case you can set the signature +to be immutable:: + + >>> add.apply_async((2, 2), link=reset_buffers.subtask(immutable=True)) + +The ``.si()`` shortcut can also be used to create immutable signatures:: + + >>> add.apply_async((2, 2), link=reset_buffers.si()) + +Only the execution options can be set when a signature is immutable, +so it's not possible to call the signature with partial args/kwargs. + +.. note:: + + In this tutorial I sometimes use the prefix operator `~` to signatures. + You probably shouldn't use it in your production code, but it's a handy shortcut + when experimenting in the Python shell:: + + >>> ~sig + + >>> # is the same as + >>> sig.delay().get() + + +.. _canvas-callbacks: + +Callbacks +--------- + +.. versionadded:: 3.0 + +Callbacks can be added to any task using the ``link`` argument +to ``apply_async``:: + + add.apply_async((2, 2), link=other_task.s()) + +The callback will only be applied if the task exited successfully, +and it will be applied with the return value of the parent task as argument. + +As I mentioned earlier, any arguments you add to a signature, +will be prepended to the arguments specified by the signature itself! + +If you have the signature:: + + >>> sig = add.s(10) + +then `sig.delay(result)` becomes:: + + >>> add.apply_async(args=(result, 10)) + +... + +Now let's call our ``add`` task with a callback using partial +arguments:: + + >>> add.apply_async((2, 2), link=add.s(8)) + +As expected this will first launch one task calculating :math:`2 + 2`, then +another task calculating :math:`4 + 8`. + +The Primitives +============== + +.. versionadded:: 3.0 + +.. topic:: Overview + + - ``group`` + + The group primitive is a signature that takes a list of tasks that should + be applied in parallel. + + - ``chain`` + + The chain primitive lets us link together signatures so that one is called + after the other, essentially forming a *chain* of callbacks. + + - ``chord`` + + A chord is just like a group but with a callback. A chord consists + of a header group and a body, where the body is a task that should execute + after all of the tasks in the header are complete. + + - ``map`` + + The map primitive works like the built-in ``map`` function, but creates + a temporary task where a list of arguments is applied to the task. + E.g. ``task.map([1, 2])`` results in a single task + being called, applying the arguments in order to the task function so + that the result is:: + + res = [task(1), task(2)] + + - ``starmap`` + + Works exactly like map except the arguments are applied as ``*args``. + For example ``add.starmap([(2, 2), (4, 4)])`` results in a single + task calling:: + + res = [add(2, 2), add(4, 4)] + + - ``chunks`` + + Chunking splits a long list of arguments into parts, e.g the operation:: + + >>> items = zip(xrange(1000), xrange(1000)) # 1000 items + >>> add.chunks(items, 10) + + will split the list of items into chunks of 10, resulting in 100 + tasks (each processing 10 items in sequence). + + +The primitives are also signature objects themselves, so that they can be combined +in any number of ways to compose complex workflows. + +Here's some examples: + +- Simple chain + + Here's a simple chain, the first task executes passing its return value + to the next task in the chain, and so on. + + .. code-block:: python + + >>> from celery import chain + + # 2 + 2 + 4 + 8 + >>> res = chain(add.s(2, 2), add.s(4), add.s(8))() + >>> res.get() + 16 + + This can also be written using pipes:: + + >>> (add.s(2, 2) | add.s(4) | add.s(8))().get() + 16 + +- Immutable signatures + + Signatures can be partial so arguments can be + added to the existing arguments, but you may not always want that, + for example if you don't want the result of the previous task in a chain. + + In that case you can mark the signature as immutable, so that the arguments + cannot be changed:: + + >>> add.subtask((2, 2), immutable=True) + + There's also an ``.si`` shortcut for this:: + + >>> add.si(2, 2) + + Now you can create a chain of independent tasks instead:: + + >>> res = (add.si(2, 2) | add.si(4, 4) | add.s(8, 8))() + >>> res.get() + 16 + + >>> res.parent.get() + 8 + + >>> res.parent.parent.get() + 4 + +- Simple group + + You can easily create a group of tasks to execute in parallel:: + + >>> from celery import group + >>> res = group(add.s(i, i) for i in xrange(10))() + >>> res.get(timeout=1) + [0, 2, 4, 6, 8, 10, 12, 14, 16, 18] + +- Simple chord + + The chord primitive enables us to add callback to be called when + all of the tasks in a group have finished executing, which is often + required for algorithms that aren't embarrassingly parallel:: + + >>> from celery import chord + >>> res = chord((add.s(i, i) for i in xrange(10)), xsum.s())() + >>> res.get() + 90 + + The above example creates 10 task that all start in parallel, + and when all of them are complete the return values are combined + into a list and sent to the ``xsum`` task. + + The body of a chord can also be immutable, so that the return value + of the group is not passed on to the callback:: + + >>> chord((import_contact.s(c) for c in contacts), + ... notify_complete.si(import_id)).apply_async() + + Note the use of ``.si`` above which creates an immutable signature. + +- Blow your mind by combining + + Chains can be partial too:: + + >>> c1 = (add.s(4) | mul.s(8)) + + # (16 + 4) * 8 + >>> res = c1(16) + >>> res.get() + 160 + + Which means that you can combine chains:: + + # ((4 + 16) * 2 + 4) * 8 + >>> c2 = (add.s(4, 16) | mul.s(2) | (add.s(4) | mul.s(8))) + + >>> res = c2() + >>> res.get() + 352 + + Chaining a group together with another task will automatically + upgrade it to be a chord:: + + >>> c3 = (group(add.s(i, i) for i in xrange(10)) | xsum.s()) + >>> res = c3() + >>> res.get() + 90 + + Groups and chords accepts partial arguments too, so in a chain + the return value of the previous task is forwarded to all tasks in the group:: + + + >>> new_user_workflow = (create_user.s() | group( + ... import_contacts.s(), + ... send_welcome_email.s())) + ... new_user_workflow.delay(username='artv', + ... first='Art', + ... last='Vandelay', + ... email='art@vandelay.com') + + + If you don't want to forward arguments to the group then + you can make the signatures in the group immutable:: + + >>> res = (add.s(4, 4) | group(add.si(i, i) for i in xrange(10)))() + >>> res.get() + + + >>> res.parent.get() + 8 + + +.. _canvas-chain: + +Chains +------ + +.. versionadded:: 3.0 + +Tasks can be linked together, which in practice means adding +a callback task:: + + >>> res = add.apply_async((2, 2), link=mul.s(16)) + >>> res.get() + 4 + +The linked task will be applied with the result of its parent +task as the first argument, which in the above case will result +in ``mul(4, 16)`` since the result is 4. + +The results will keep track of what subtasks a task applies, +and this can be accessed from the result instance:: + + >>> res.children + [] + + >>> res.children[0].get() + 64 + +The result instance also has a :meth:`~@AsyncResult.collect` method +that treats the result as a graph, enabling you to iterate over +the results:: + + >>> list(res.collect()) + [(, 4), + (, 64)] + +By default :meth:`~@AsyncResult.collect` will raise an +:exc:`~@IncompleteStream` exception if the graph is not fully +formed (one of the tasks has not completed yet), +but you can get an intermediate representation of the graph +too:: + + >>> for result, value in res.collect(intermediate=True)): + .... + +You can link together as many tasks as you like, +and signatures can be linked too:: + + >>> s = add.s(2, 2) + >>> s.link(mul.s(4)) + >>> s.link(log_result.s()) + +You can also add *error callbacks* using the ``link_error`` argument:: + + >>> add.apply_async((2, 2), link_error=log_error.s()) + + >>> add.subtask((2, 2), link_error=log_error.s()) + +Since exceptions can only be serialized when pickle is used +the error callbacks take the id of the parent task as argument instead: + +.. code-block:: python + + from __future__ import print_function + import os + from proj.celery import app + + @app.task + def log_error(task_id): + result = app.AsyncResult(task_id) + result.get(propagate=False) # make sure result written. + with open(os.path.join('/var/errors', task_id), 'a') as fh: + print('--\n\n{0} {1} {2}'.format( + task_id, result.result, result.traceback), file=fh) + +To make it even easier to link tasks together there is +a special signature called :class:`~celery.chain` that lets +you chain tasks together: + +.. code-block:: python + + >>> from celery import chain + >>> from proj.tasks import add, mul + + # (4 + 4) * 8 * 10 + >>> res = chain(add.s(4, 4), mul.s(8), mul.s(10)) + proj.tasks.add(4, 4) | proj.tasks.mul(8) | proj.tasks.mul(10) + + +Calling the chain will call the tasks in the current process +and return the result of the last task in the chain:: + + >>> res = chain(add.s(4, 4), mul.s(8), mul.s(10))() + >>> res.get() + 640 + +It also sets ``parent`` attributes so that you can +work your way up the chain to get intermediate results:: + + >>> res.parent.get() + 64 + + >>> res.parent.parent.get() + 8 + + >>> res.parent.parent + + + +Chains can also be made using the ``|`` (pipe) operator:: + + >>> (add.s(2, 2) | mul.s(8) | mul.s(10)).apply_async() + +Graphs +~~~~~~ + +In addition you can work with the result graph as a +:class:`~celery.datastructures.DependencyGraph`: + +.. code-block:: python + + >>> res = chain(add.s(4, 4), mul.s(8), mul.s(10))() + + >>> res.parent.parent.graph + 285fa253-fcf8-42ef-8b95-0078897e83e6(1) + 463afec2-5ed4-4036-b22d-ba067ec64f52(0) + 872c3995-6fa0-46ca-98c2-5a19155afcf0(2) + 285fa253-fcf8-42ef-8b95-0078897e83e6(1) + 463afec2-5ed4-4036-b22d-ba067ec64f52(0) + +You can even convert these graphs to *dot* format:: + + >>> with open('graph.dot', 'w') as fh: + ... res.parent.parent.graph.to_dot(fh) + + +and create images: + +.. code-block:: bash + + $ dot -Tpng graph.dot -o graph.png + +.. image:: ../images/result_graph.png + +.. _canvas-group: + +Groups +------ + +.. versionadded:: 3.0 + +A group can be used to execute several tasks in parallel. + +The :class:`~celery.group` function takes a list of signatures:: + + >>> from celery import group + >>> from proj.tasks import add + + >>> group(add.s(2, 2), add.s(4, 4)) + (proj.tasks.add(2, 2), proj.tasks.add(4, 4)) + +If you **call** the group, the tasks will be applied +one after one in the current process, and a :class:`~celery.result.GroupResult` +instance is returned which can be used to keep track of the results, +or tell how many tasks are ready and so on:: + + >>> g = group(add.s(2, 2), add.s(4, 4)) + >>> res = g() + >>> res.get() + [4, 8] + +Group also supports iterators:: + + >>> group(add.s(i, i) for i in xrange(100))() + +A group is a signature object, so it can be used in combination +with other signatures. + +Group Results +~~~~~~~~~~~~~ + +The group task returns a special result too, +this result works just like normal task results, except +that it works on the group as a whole:: + + >>> from celery import group + >>> from tasks import add + + >>> job = group([ + ... add.s(2, 2), + ... add.s(4, 4), + ... add.s(8, 8), + ... add.s(16, 16), + ... add.s(32, 32), + ... ]) + + >>> result = job.apply_async() + + >>> result.ready() # have all subtasks completed? + True + >>> result.successful() # were all subtasks successful? + True + >>> result.get() + [4, 8, 16, 32, 64] + +The :class:`~celery.result.GroupResult` takes a list of +:class:`~celery.result.AsyncResult` instances and operates on them as +if it was a single task. + +It supports the following operations: + +* :meth:`~celery.result.GroupResult.successful` + + Return :const:`True` if all of the subtasks finished + successfully (e.g. did not raise an exception). + +* :meth:`~celery.result.GroupResult.failed` + + Return :const:`True` if any of the subtasks failed. + +* :meth:`~celery.result.GroupResult.waiting` + + Return :const:`True` if any of the subtasks + is not ready yet. + +* :meth:`~celery.result.GroupResult.ready` + + Return :const:`True` if all of the subtasks + are ready. + +* :meth:`~celery.result.GroupResult.completed_count` + + Return the number of completed subtasks. + +* :meth:`~celery.result.GroupResult.revoke` + + Revoke all of the subtasks. + +* :meth:`~celery.result.GroupResult.join` + + Gather the results for all of the subtasks + and return a list with them ordered by the order of which they + were called. + +.. _canvas-chord: + +Chords +------ + +.. versionadded:: 2.3 + +.. note:: + + Tasks used within a chord must *not* ignore their results. If the result + backend is disabled for *any* task (header or body) in your chord you + should read ":ref:`chord-important-notes`". + + +A chord is a task that only executes after all of the tasks in a group have +finished executing. + + +Let's calculate the sum of the expression +:math:`1 + 1 + 2 + 2 + 3 + 3 ... n + n` up to a hundred digits. + +First you need two tasks, :func:`add` and :func:`tsum` (:func:`sum` is +already a standard function): + +.. code-block:: python + + @app.task + def add(x, y): + return x + y + + @app.task + def tsum(numbers): + return sum(numbers) + + +Now you can use a chord to calculate each addition step in parallel, and then +get the sum of the resulting numbers:: + + >>> from celery import chord + >>> from tasks import add, tsum + + >>> chord(add.s(i, i) + ... for i in xrange(100))(tsum.s()).get() + 9900 + + +This is obviously a very contrived example, the overhead of messaging and +synchronization makes this a lot slower than its Python counterpart:: + + sum(i + i for i in xrange(100)) + +The synchronization step is costly, so you should avoid using chords as much +as possible. Still, the chord is a powerful primitive to have in your toolbox +as synchronization is a required step for many parallel algorithms. + +Let's break the chord expression down: + +.. code-block:: python + + >>> callback = tsum.s() + >>> header = [add.s(i, i) for i in range(100)] + >>> result = chord(header)(callback) + >>> result.get() + 9900 + +Remember, the callback can only be executed after all of the tasks in the +header have returned. Each step in the header is executed as a task, in +parallel, possibly on different nodes. The callback is then applied with +the return value of each task in the header. The task id returned by +:meth:`chord` is the id of the callback, so you can wait for it to complete +and get the final return value (but remember to :ref:`never have a task wait +for other tasks `) + +.. _chord-errors: + +Error handling +~~~~~~~~~~~~~~ + +So what happens if one of the tasks raises an exception? + +This was not documented for some time and before version 3.1 +the exception value will be forwarded to the chord callback. + + +From 3.1 errors will propagate to the callback, so the callback will not be executed +instead the callback changes to failure state, and the error is set +to the :exc:`~@ChordError` exception: + +.. code-block:: python + + >>> c = chord([add.s(4, 4), raising_task.s(), add.s(8, 8)]) + >>> result = c() + >>> result.get() + Traceback (most recent call last): + File "", line 1, in + File "*/celery/result.py", line 120, in get + interval=interval) + File "*/celery/backends/amqp.py", line 150, in wait_for + raise self.exception_to_python(meta['result']) + celery.exceptions.ChordError: Dependency 97de6f3f-ea67-4517-a21c-d867c61fcb47 + raised ValueError('something something',) + +If you're running 3.0.14 or later you can enable the new behavior via +the :setting:`CELERY_CHORD_PROPAGATES` setting:: + + CELERY_CHORD_PROPAGATES = True + +While the traceback may be different depending on which result backend is +being used, you can see the error description includes the id of the task that failed +and a string representation of the original exception. You can also +find the original traceback in ``result.traceback``. + +Note that the rest of the tasks will still execute, so the third task +(``add.s(8, 8)``) is still executed even though the middle task failed. +Also the :exc:`~@ChordError` only shows the task that failed +first (in time): it does not respect the ordering of the header group. + +.. _chord-important-notes: + +Important Notes +~~~~~~~~~~~~~~~ + +Tasks used within a chord must *not* ignore their results. In practice this +means that you must enable a :const:`CELERY_RESULT_BACKEND` in order to use +chords. Additionally, if :const:`CELERY_IGNORE_RESULT` is set to :const:`True` +in your configuration, be sure that the individual tasks to be used within +the chord are defined with :const:`ignore_result=False`. This applies to both +Task subclasses and decorated tasks. + +Example Task subclass: + +.. code-block:: python + + class MyTask(Task): + abstract = True + ignore_result = False + + +Example decorated task: + +.. code-block:: python + + @app.task(ignore_result=False) + def another_task(project): + do_something() + +By default the synchronization step is implemented by having a recurring task +poll the completion of the group every second, calling the signature when +ready. + +Example implementation: + +.. code-block:: python + + from celery import maybe_signature + + @app.task(bind=True) + def unlock_chord(self, group, callback, interval=1, max_retries=None): + if group.ready(): + return maybe_signature(callback).delay(group.join()) + raise self.retry(countdown=interval, max_retries=max_retries) + + +This is used by all result backends except Redis and Memcached, which +increment a counter after each task in the header, then applying the callback +when the counter exceeds the number of tasks in the set. *Note:* chords do not +properly work with Redis before version 2.2; you will need to upgrade to at +least 2.2 to use them. + +The Redis and Memcached approach is a much better solution, but not easily +implemented in other backends (suggestions welcome!). + + +.. note:: + + If you are using chords with the Redis result backend and also overriding + the :meth:`Task.after_return` method, you need to make sure to call the + super method or else the chord callback will not be applied. + + .. code-block:: python + + def after_return(self, *args, **kwargs): + do_something() + super(MyTask, self).after_return(*args, **kwargs) + +.. _canvas-map: + +Map & Starmap +------------- + +:class:`~celery.map` and :class:`~celery.starmap` are built-in tasks +that calls the task for every element in a sequence. + +They differ from group in that + +- only one task message is sent + +- the operation is sequential. + +For example using ``map``: + +.. code-block:: python + + >>> from proj.tasks import add + + >>> ~xsum.map([range(10), range(100)]) + [45, 4950] + +is the same as having a task doing: + +.. code-block:: python + + @app.task + def temp(): + return [xsum(range(10)), xsum(range(100))] + +and using ``starmap``:: + + >>> ~add.starmap(zip(range(10), range(10))) + [0, 2, 4, 6, 8, 10, 12, 14, 16, 18] + +is the same as having a task doing: + +.. code-block:: python + + @app.task + def temp(): + return [add(i, i) for i in range(10)] + +Both ``map`` and ``starmap`` are signature objects, so they can be used as +other signatures and combined in groups etc., for example +to call the starmap after 10 seconds:: + + >>> add.starmap(zip(range(10), range(10))).apply_async(countdown=10) + +.. _canvas-chunks: + +Chunks +------ + +Chunking lets you divide an iterable of work into pieces, so that if +you have one million objects, you can create 10 tasks with hundred +thousand objects each. + +Some may worry that chunking your tasks results in a degradation +of parallelism, but this is rarely true for a busy cluster +and in practice since you are avoiding the overhead of messaging +it may considerably increase performance. + +To create a chunks signature you can use :meth:`@Task.chunks`: + +.. code-block:: python + + >>> add.chunks(zip(range(100), range(100)), 10) + +As with :class:`~celery.group` the act of sending the messages for +the chunks will happen in the current process when called: + +.. code-block:: python + + >>> from proj.tasks import add + + >>> res = add.chunks(zip(range(100), range(100)), 10)() + >>> res.get() + [[0, 2, 4, 6, 8, 10, 12, 14, 16, 18], + [20, 22, 24, 26, 28, 30, 32, 34, 36, 38], + [40, 42, 44, 46, 48, 50, 52, 54, 56, 58], + [60, 62, 64, 66, 68, 70, 72, 74, 76, 78], + [80, 82, 84, 86, 88, 90, 92, 94, 96, 98], + [100, 102, 104, 106, 108, 110, 112, 114, 116, 118], + [120, 122, 124, 126, 128, 130, 132, 134, 136, 138], + [140, 142, 144, 146, 148, 150, 152, 154, 156, 158], + [160, 162, 164, 166, 168, 170, 172, 174, 176, 178], + [180, 182, 184, 186, 188, 190, 192, 194, 196, 198]] + +while calling ``.apply_async`` will create a dedicated +task so that the individual tasks are applied in a worker +instead:: + + >>> add.chunks(zip(range(100), range(100), 10)).apply_async() + +You can also convert chunks to a group:: + + >>> group = add.chunks(zip(range(100), range(100), 10)).group() + +and with the group skew the countdown of each task by increments +of one:: + + >>> group.skew(start=1, stop=10)() + +which means that the first task will have a countdown of 1, the second +a countdown of 2 and so on. diff --git a/docs/userguide/concurrency/eventlet.rst b/docs/userguide/concurrency/eventlet.rst new file mode 100644 index 0000000..aec95fd --- /dev/null +++ b/docs/userguide/concurrency/eventlet.rst @@ -0,0 +1,65 @@ +.. _concurrency-eventlet: + +=========================== + Concurrency with Eventlet +=========================== + +.. _eventlet-introduction: + +Introduction +============ + +The `Eventlet`_ homepage describes it as; +A concurrent networking library for Python that allows you to +change how you run your code, not how you write it. + + * It uses `epoll(4)`_ or `libevent`_ for + `highly scalable non-blocking I/O`_. + * `Coroutines`_ ensure that the developer uses a blocking style of + programming that is similar to threading, but provide the benefits of + non-blocking I/O. + * The event dispatch is implicit, which means you can easily use Eventlet + from the Python interpreter, or as a small part of a larger application. + +Celery supports Eventlet as an alternative execution pool implementation. +It is in some cases superior to prefork, but you need to ensure +your tasks do not perform blocking calls, as this will halt all +other operations in the worker until the blocking call returns. + +The prefork pool can take use of multiple processes, but how many is +often limited to a few processes per CPU. With Eventlet you can efficiently +spawn hundreds, or thousands of green threads. In an informal test with a +feed hub system the Eventlet pool could fetch and process hundreds of feeds +every second, while the prefork pool spent 14 seconds processing 100 +feeds. Note that is one of the applications evented I/O is especially good +at (asynchronous HTTP requests). You may want a mix of both Eventlet and +prefork workers, and route tasks according to compatibility or +what works best. + +Enabling Eventlet +================= + +You can enable the Eventlet pool by using the ``-P`` option to +:program:`celery worker`: + +.. code-block:: bash + + $ celery -A proj worker -P eventlet -c 1000 + +.. _eventlet-examples: + +Examples +======== + +See the `Eventlet examples`_ directory in the Celery distribution for +some examples taking use of Eventlet support. + +.. _`Eventlet`: http://eventlet.net +.. _`epoll(4)`: http://linux.die.net/man/4/epoll +.. _`libevent`: http://monkey.org/~provos/libevent/ +.. _`highly scalable non-blocking I/O`: + http://en.wikipedia.org/wiki/Asynchronous_I/O#Select.28.2Fpoll.29_loops +.. _`Coroutines`: http://en.wikipedia.org/wiki/Coroutine +.. _`Eventlet examples`: + https://github.com/celery/celery/tree/master/examples/eventlet + diff --git a/docs/userguide/concurrency/index.rst b/docs/userguide/concurrency/index.rst new file mode 100644 index 0000000..4bdf54b --- /dev/null +++ b/docs/userguide/concurrency/index.rst @@ -0,0 +1,13 @@ +.. _concurrency: + +============= + Concurrency +============= + +:Release: |version| +:Date: |today| + +.. toctree:: + :maxdepth: 2 + + eventlet diff --git a/docs/userguide/extending.rst b/docs/userguide/extending.rst new file mode 100644 index 0000000..4187803 --- /dev/null +++ b/docs/userguide/extending.rst @@ -0,0 +1,801 @@ +.. _guide-extending: + +========================== + Extensions and Bootsteps +========================== + +.. contents:: + :local: + :depth: 2 + +.. _extending-custom-consumers: + +Custom Message Consumers +======================== + +You may want to embed custom Kombu consumers to manually process your messages. + +For that purpose a special :class:`~celery.bootstep.ConsumerStep` bootstep class +exists, where you only need to define the ``get_consumers`` method, which must +return a list of :class:`kombu.Consumer` objects to start +whenever the connection is established: + +.. code-block:: python + + from celery import Celery + from celery import bootsteps + from kombu import Consumer, Exchange, Queue + + my_queue = Queue('custom', Exchange('custom'), 'routing_key') + + app = Celery(broker='amqp://') + + + class MyConsumerStep(bootsteps.ConsumerStep): + + def get_consumers(self, channel): + return [Consumer(channel, + queues=[my_queue], + callbacks=[self.handle_message], + accept=['json'])] + + def handle_message(self, body, message): + print('Received message: {0!r}'.format(body)) + message.ack() + app.steps['consumer'].add(MyConsumerStep) + + def send_me_a_message(self, who='world!', producer=None): + with app.producer_or_acquire(producer) as producer: + producer.send( + {'hello': who}, + serializer='json', + exchange=my_queue.exchange, + routing_key='routing_key', + declare=[my_queue], + retry=True, + ) + + if __name__ == '__main__': + send_me_a_message('celery') + + +.. note:: + + Kombu Consumers can take use of two different message callback dispatching + mechanisms. The first one is the ``callbacks`` argument which accepts + a list of callbacks with a ``(body, message)`` signature, + the second one is the ``on_message`` argument which takes a single + callback with a ``(message, )`` signature. The latter will not + automatically decode and deserialize the payload which is useful + in many cases: + + .. code-block:: python + + def get_consumers(self, channel): + return [Consumer(channel, queues=[my_queue], + on_message=self.on_message)] + + + def on_message(self, message): + payload = message.decode() + print( + 'Received message: {0!r} {props!r} rawlen={s}'.format( + payload, props=message.properties, s=len(message.body), + )) + message.ack() + +.. _extending-blueprints: + +Blueprints +========== + +Bootsteps is a technique to add functionality to the workers. +A bootstep is a custom class that defines hooks to do custom actions +at different stages in the worker. Every bootstep belongs to a blueprint, +and the worker currently defines two blueprints: **Worker**, and **Consumer** + +---------------------------------------------------------- + +**Figure A:** Bootsteps in the Worker and Consumer blueprints. Starting + from the bottom up the first step in the worker blueprint + is the Timer, and the last step is to start the Consumer blueprint, + which then establishes the broker connection and starts + consuming messages. + +.. figure:: ../images/worker_graph_full.png + +---------------------------------------------------------- + + +Worker +====== + +The Worker is the first blueprint to start, and with it starts major components like +the event loop, processing pool, the timer, and also optional components +like the autoscaler. When the worker is fully started it will continue +to the Consumer blueprint. + +The :class:`~celery.worker.WorkController` is the core worker implementation, +and contains several methods and attributes that you can use in your bootstep. + +Attributes +---------- + +.. attribute:: app + + The current app instance. + +.. attribute:: hostname + + The workers node name (e.g. `worker1@example.com`) + +.. attribute:: blueprint + + This is the worker :class:`~celery.bootsteps.Blueprint`. + +.. attribute:: hub + + Event loop object (:class:`~kombu.async.Hub`). You can use + this to register callbacks in the event loop. + + This is only supported by async I/O enabled transports (amqp, redis), + in which case the `worker.use_eventloop` attribute should be set. + + Your worker bootstep must require the Hub bootstep to use this: + + .. code-block:: python + + class WorkerStep(bootsteps.StartStopStep): + requires = ('celery.worker.components:Hub', ) + +.. attribute:: pool + + The current process/eventlet/gevent/thread pool. + See :class:`celery.concurrency.base.BasePool`. + + Your worker bootstep must require the Pool bootstep to use this: + + .. code-block:: python + + class WorkerStep(bootsteps.StartStopStep): + requires = ('celery.worker.components:Pool', ) + +.. attribute:: timer + + :class:`~kombu.async.timer.Timer` used to schedule functions. + + Your worker bootstep must require the Timer bootstep to use this: + + .. code-block:: python + + class WorkerStep(bootsteps.StartStopStep): + requires = ('celery.worker.components:Timer', ) + +.. attribute:: statedb + + :class:`Database `` to persist state between + worker restarts. + + This is only defined if the ``statedb`` argument is enabled. + + Your worker bootstep must require the Statedb bootstep to use this: + + .. code-block:: python + + class WorkerStep(bootsteps.StartStopStep): + requires = ('celery.worker.components:Statedb', ) + +.. attribute:: autoscaler + + :class:`~celery.worker.autoscaler.Autoscaler` used to automatically grow + and shrink the number of processes in the pool. + + This is only defined if the ``autoscale`` argument is enabled. + + Your worker bootstep must require the `Autoscaler` bootstep to use this: + + .. code-block:: python + + class WorkerStep(bootsteps.StartStopStep): + requires = ('celery.worker.autoscaler:Autoscaler', ) + +.. attribute:: autoreloader + + :class:`~celery.worker.autoreloder.Autoreloader` used to automatically + reload use code when the filesystem changes. + + This is only defined if the ``autoreload`` argument is enabled. + Your worker bootstep must require the `Autoreloader` bootstep to use this; + + .. code-block:: python + + class WorkerStep(bootsteps.StartStopStep): + requires = ('celery.worker.autoreloader:Autoreloader', ) + +An example Worker bootstep could be: + +.. code-block:: python + + from celery import bootsteps + + class ExampleWorkerStep(bootsteps.StartStopStep): + requires = ('Pool', ) + + def __init__(self, worker, **kwargs): + print('Called when the WorkController instance is constructed') + print('Arguments to WorkController: {0!r}'.format(kwargs)) + + def create(self, worker): + # this method can be used to delegate the action methods + # to another object that implements ``start`` and ``stop``. + return self + + def start(self, worker): + print('Called when the worker is started.') + + def stop(self, worker): + print("Called when the worker shuts down.") + + def terminate(self, worker): + print("Called when the worker terminates") + + +Every method is passed the current ``WorkController`` instance as the first +argument. + + +Another example could use the timer to wake up at regular intervals: + +.. code-block:: python + + from celery import bootsteps + + + class DeadlockDetection(bootsteps.StartStopStep): + requires = ('Timer', ) + + def __init__(self, worker, deadlock_timeout=3600): + self.timeout = deadlock_timeout + self.requests = [] + self.tref = None + + def start(self, worker): + # run every 30 seconds. + self.tref = worker.timer.call_repeatedly( + 30.0, self.detect, (worker, ), priority=10, + ) + + def stop(self, worker): + if self.tref: + self.tref.cancel() + self.tref = None + + def detect(self, worker): + # update active requests + for req in self.worker.active_requests: + if req.time_start and time() - req.time_start > self.timeout: + raise SystemExit() + +Consumer +======== + +The Consumer blueprint establishes a connection to the broker, and +is restarted every time this connection is lost. Consumer bootsteps +include the worker heartbeat, the remote control command consumer, and +importantly, the task consumer. + +When you create consumer bootsteps you must take into account that it must +be possible to restart your blueprint. An additional 'shutdown' method is +defined for consumer bootsteps, this method is called when the worker is +shutdown. + +Attributes +---------- + +.. attribute:: app + + The current app instance. + +.. attribute:: controller + + The parent :class:`~@WorkController` object that created this consumer. + +.. attribute:: hostname + + The workers node name (e.g. `worker1@example.com`) + +.. attribute:: blueprint + + This is the worker :class:`~celery.bootsteps.Blueprint`. + +.. attribute:: hub + + Event loop object (:class:`~kombu.async.Hub`). You can use + this to register callbacks in the event loop. + + This is only supported by async I/O enabled transports (amqp, redis), + in which case the `worker.use_eventloop` attribute should be set. + + Your worker bootstep must require the Hub bootstep to use this: + + .. code-block:: python + + class WorkerStep(bootsteps.StartStopStep): + requires = ('celery.worker:Hub', ) + + +.. attribute:: connection + + The current broker connection (:class:`kombu.Connection`). + + A consumer bootstep must require the 'Connection' bootstep + to use this: + + .. code-block:: python + + class Step(bootsteps.StartStopStep): + requires = ('celery.worker.consumer:Connection', ) + +.. attribute:: event_dispatcher + + A :class:`@events.Dispatcher` object that can be used to send events. + + A consumer bootstep must require the `Events` bootstep to use this. + + .. code-block:: python + + class Step(bootsteps.StartStopStep): + requires = ('celery.worker.consumer:Events', ) + +.. attribute:: gossip + + Worker to worker broadcast communication + (class:`~celery.worker.consumer.Gossip`). + + A consumer bootstep must require the `Gossip` bootstep to use this. + + .. code-block:: python + + class Step(bootsteps.StartStopStep): + requires = ('celery.worker.consumer:Events', ) + +.. attribute:: pool + + The current process/eventlet/gevent/thread pool. + See :class:`celery.concurrency.base.BasePool`. + +.. attribute:: timer + + :class:`Timer >> app = Celery() + >>> app.steps['worker'].add(MyWorkerStep) # < add class, do not instantiate + >>> app.steps['consumer'].add(MyConsumerStep) + + >>> app.steps['consumer'].update([StepA, StepB]) + + >>> app.steps['consumer'] + {step:proj.StepB{()}, step:proj.MyConsumerStep{()}, step:proj.StepA{()} + +The order of steps is not important here as the order is decided by the +resulting dependency graph (``Step.requires``). + +To illustrate how you can install bootsteps and how they work, this is an example step that +prints some useless debugging information. +It can be added both as a worker and consumer bootstep: + + +.. code-block:: python + + from celery import Celery + from celery import bootsteps + + class InfoStep(bootsteps.Step): + + def __init__(self, parent, **kwargs): + # here we can prepare the Worker/Consumer object + # in any way we want, set attribute defaults and so on. + print('{0!r} is in init'.format(parent)) + + def start(self, parent): + # our step is started together with all other Worker/Consumer + # bootsteps. + print('{0!r} is starting'.format(parent)) + + def stop(self, parent): + # the Consumer calls stop every time the consumer is restarted + # (i.e. connection is lost) and also at shutdown. The Worker + # will call stop at shutdown only. + print('{0!r} is stopping'.format(parent)) + + def shutdown(self, parent): + # shutdown is called by the Consumer at shutdown, it's not + # called by Worker. + print('{0!r} is shutting down'.format(parent)) + + app = Celery(broker='amqp://') + app.steps['worker'].add(InfoStep) + app.steps['consumer'].add(InfoStep) + +Starting the worker with this step installed will give us the following +logs:: + + is in init + is in init + [2013-05-29 16:18:20,544: WARNING/MainProcess] + is starting + [2013-05-29 16:18:21,577: WARNING/MainProcess] + is starting + is stopping + is stopping + is shutting down + +The ``print`` statements will be redirected to the logging subsystem after +the worker has been initialized, so the "is starting" lines are timestamped. +You may notice that this does no longer happen at shutdown, this is because +the ``stop`` and ``shutdown`` methods are called inside a *signal handler*, +and it's not safe to use logging inside such a handler. +Logging with the Python logging module is not :term:`reentrant`, +which means that you cannot interrupt the function and +call it again later. It's important that the ``stop`` and ``shutdown`` methods +you write is also :term:`reentrant`. + +Starting the worker with ``--loglevel=debug`` will show us more +information about the boot process:: + + [2013-05-29 16:18:20,509: DEBUG/MainProcess] | Worker: Preparing bootsteps. + [2013-05-29 16:18:20,511: DEBUG/MainProcess] | Worker: Building graph... + is in init + [2013-05-29 16:18:20,511: DEBUG/MainProcess] | Worker: New boot order: + {Hub, Queues (intra), Pool, Autoreloader, Timer, StateDB, + Autoscaler, InfoStep, Beat, Consumer} + [2013-05-29 16:18:20,514: DEBUG/MainProcess] | Consumer: Preparing bootsteps. + [2013-05-29 16:18:20,514: DEBUG/MainProcess] | Consumer: Building graph... + is in init + [2013-05-29 16:18:20,515: DEBUG/MainProcess] | Consumer: New boot order: + {Connection, Mingle, Events, Gossip, InfoStep, Agent, + Heart, Control, Tasks, event loop} + [2013-05-29 16:18:20,522: DEBUG/MainProcess] | Worker: Starting Hub + [2013-05-29 16:18:20,522: DEBUG/MainProcess] ^-- substep ok + [2013-05-29 16:18:20,522: DEBUG/MainProcess] | Worker: Starting Pool + [2013-05-29 16:18:20,542: DEBUG/MainProcess] ^-- substep ok + [2013-05-29 16:18:20,543: DEBUG/MainProcess] | Worker: Starting InfoStep + [2013-05-29 16:18:20,544: WARNING/MainProcess] + is starting + [2013-05-29 16:18:20,544: DEBUG/MainProcess] ^-- substep ok + [2013-05-29 16:18:20,544: DEBUG/MainProcess] | Worker: Starting Consumer + [2013-05-29 16:18:20,544: DEBUG/MainProcess] | Consumer: Starting Connection + [2013-05-29 16:18:20,559: INFO/MainProcess] Connected to amqp://guest@127.0.0.1:5672// + [2013-05-29 16:18:20,560: DEBUG/MainProcess] ^-- substep ok + [2013-05-29 16:18:20,560: DEBUG/MainProcess] | Consumer: Starting Mingle + [2013-05-29 16:18:20,560: INFO/MainProcess] mingle: searching for neighbors + [2013-05-29 16:18:21,570: INFO/MainProcess] mingle: no one here + [2013-05-29 16:18:21,570: DEBUG/MainProcess] ^-- substep ok + [2013-05-29 16:18:21,571: DEBUG/MainProcess] | Consumer: Starting Events + [2013-05-29 16:18:21,572: DEBUG/MainProcess] ^-- substep ok + [2013-05-29 16:18:21,572: DEBUG/MainProcess] | Consumer: Starting Gossip + [2013-05-29 16:18:21,577: DEBUG/MainProcess] ^-- substep ok + [2013-05-29 16:18:21,577: DEBUG/MainProcess] | Consumer: Starting InfoStep + [2013-05-29 16:18:21,577: WARNING/MainProcess] + is starting + [2013-05-29 16:18:21,578: DEBUG/MainProcess] ^-- substep ok + [2013-05-29 16:18:21,578: DEBUG/MainProcess] | Consumer: Starting Heart + [2013-05-29 16:18:21,579: DEBUG/MainProcess] ^-- substep ok + [2013-05-29 16:18:21,579: DEBUG/MainProcess] | Consumer: Starting Control + [2013-05-29 16:18:21,583: DEBUG/MainProcess] ^-- substep ok + [2013-05-29 16:18:21,583: DEBUG/MainProcess] | Consumer: Starting Tasks + [2013-05-29 16:18:21,606: DEBUG/MainProcess] basic.qos: prefetch_count->80 + [2013-05-29 16:18:21,606: DEBUG/MainProcess] ^-- substep ok + [2013-05-29 16:18:21,606: DEBUG/MainProcess] | Consumer: Starting event loop + [2013-05-29 16:18:21,608: WARNING/MainProcess] celery@example.com ready. + + +.. _extending-programs: + +Command-line programs +===================== + +.. _extending-commandoptions: + +Adding new command-line options +------------------------------- + +.. _extending-command-options: + +Command-specific options +~~~~~~~~~~~~~~~~~~~~~~~~ + +You can add additional command-line options to the ``worker``, ``beat`` and +``events`` commands by modifying the :attr:`~@Celery.user_options` attribute of the +application instance. + +Celery commands uses the :mod:`optparse` module to parse command-line +arguments, and so you have to use :mod:`optparse` specific option instances created +using :func:`optparse.make_option`. Please see the :mod:`optparse` +documentation to read about the fields supported. + +Example adding a custom option to the :program:`celery worker` command: + +.. code-block:: python + + from celery import Celery + from celery.bin import Option # <-- alias to optparse.make_option + + app = Celery(broker='amqp://') + + app.user_options['worker'].add( + Option('--enable-my-option', action='store_true', default=False, + help='Enable custom option.'), + ) + + +All bootsteps will now receive this argument as a keyword argument to +``Bootstep.__init__``: + +.. code-block:: python + + from celery import bootsteps + + class MyBootstep(bootsteps.Step): + + def __init__(self, worker, enable_my_option=False, **options): + if enable_my_option: + party() + + app.steps['worker'].add(MyBootstep) + +.. _extending-preload_options: + +Preload options +~~~~~~~~~~~~~~~ + +The :program:`celery` umbrella command supports the concept of 'preload +options', which are special options passed to all subcommands and parsed +outside of the main parsing step. + +The list of default preload options can be found in the API reference: +:mod:`celery.bin.base`. + +You can add new preload options too, e.g. to specify a configuration template: + +.. code-block:: python + + from celery import Celery + from celery import signals + from celery.bin import Option + + app = Celery() + app.user_options['preload'].add( + Option('-Z', '--template', default='default', + help='Configuration template to use.'), + ) + + @signals.user_preload_options.connect + def on_preload_parsed(options, **kwargs): + use_template(options['template']) + +.. _extending-subcommands: + +Adding new :program:`celery` sub-commands +----------------------------------------- + +New commands can be added to the :program:`celery` umbrella command by using +`setuptools entry-points`_. + +.. _`setuptools entry-points`: + http://reinout.vanrees.org/weblog/2010/01/06/zest-releaser-entry-points.html + + +Entry-points is special metadata that can be added to your packages ``setup.py`` program, +and then after installation, read from the system using the :mod:`pkg_resources` module. + +Celery recognizes ``celery.commands`` entry-points to install additional +subcommands, where the value of the entry-point must point to a valid subclass +of :class:`celery.bin.base.Command`. There is limited documentation, +unfortunately, but you can find inspiration from the various commands in the +:mod:`celery.bin` package. + +This is how the Flower_ monitoring extension adds the :program:`celery flower` command, +by adding an entry-point in :file:`setup.py`: + +.. code-block:: python + + setup( + name='flower', + entry_points={ + 'celery.commands': [ + 'flower = flower.command.FlowerCommand', + ], + } + ) + + +.. _Flower: http://pypi.python.org/pypi/flower + +The command definition is in two parts separated by the equal sign, where the +first part is the name of the subcommand (flower), then the fully qualified +module path to the class that implements the command +(``flower.command.FlowerCommand``). + + +In the module :file:`flower/command.py`, the command class is defined +something like this: + +.. code-block:: python + + from celery.bin.base import Command, Option + + + class FlowerCommand(Command): + + def get_options(self): + return ( + Option('--port', default=8888, type='int', + help='Webserver port', + ), + Option('--debug', action='store_true'), + ) + + def run(self, port=None, debug=False, **kwargs): + print('Running our command') + + +Worker API +========== + + +:class:`~kombu.async.Hub` - The workers async event loop. +--------------------------------------------------------- +:supported transports: amqp, redis + +.. versionadded:: 3.0 + +The worker uses asynchronous I/O when the amqp or redis broker transports are +used. The eventual goal is for all transports to use the eventloop, but that +will take some time so other transports still use a threading-based solution. + +.. method:: hub.add(fd, callback, flags) + + +.. method:: hub.add_reader(fd, callback, \*args) + + Add callback to be called when ``fd`` is readable. + + The callback will stay registered until explictly removed using + :meth:`hub.remove(fd) `, or the fd is automatically discarded + because it's no longer valid. + + Note that only one callback can be registered for any given fd at a time, + so calling ``add`` a second time will remove any callback that + was previously registered for that fd. + + A file descriptor is any file-like object that supports the ``fileno`` + method, or it can be the file descriptor number (int). + +.. method:: hub.add_writer(fd, callback, \*args) + + Add callback to be called when ``fd`` is writable. + See also notes for :meth:`hub.add_reader` above. + +.. method:: hub.remove(fd) + + Remove all callbacks for ``fd`` from the loop. + +Timer - Scheduling events +------------------------- + +.. method:: timer.call_after(secs, callback, args=(), kwargs=(), + priority=0) + +.. method:: timer.call_repeatedly(secs, callback, args=(), kwargs=(), + priority=0) + +.. method:: timer.call_at(eta, callback, args=(), kwargs=(), + priority=0) diff --git a/docs/userguide/index.rst b/docs/userguide/index.rst new file mode 100644 index 0000000..83ca54e --- /dev/null +++ b/docs/userguide/index.rst @@ -0,0 +1,26 @@ +.. _guide: + +============ + User Guide +============ + +:Release: |version| +:Date: |today| + +.. toctree:: + :maxdepth: 1 + + application + tasks + calling + canvas + workers + periodic-tasks + remote-tasks + routing + monitoring + security + optimizing + concurrency/index + signals + extending diff --git a/docs/userguide/monitoring.rst b/docs/userguide/monitoring.rst new file mode 100644 index 0000000..6b85ae3 --- /dev/null +++ b/docs/userguide/monitoring.rst @@ -0,0 +1,747 @@ +.. _guide-monitoring: + +================================= + Monitoring and Management Guide +================================= + +.. contents:: + :local: + +Introduction +============ + +There are several tools available to monitor and inspect Celery clusters. + +This document describes some of these, as as well as +features related to monitoring, like events and broadcast commands. + +.. _monitoring-workers: + +Workers +======= + +.. _monitoring-control: + +Management Command-line Utilities (``inspect``/``control``) +----------------------------------------------------------- + + +:program:`celery` can also be used to inspect +and manage worker nodes (and to some degree tasks). + +To list all the commands available do: + +.. code-block:: bash + + $ celery help + +or to get help for a specific command do: + +.. code-block:: bash + + $ celery --help + +Commands +~~~~~~~~ + +* **shell**: Drop into a Python shell. + + The locals will include the ``celery`` variable, which is the current app. + Also all known tasks will be automatically added to locals (unless the + ``--without-tasks`` flag is set). + + Uses Ipython, bpython, or regular python in that order if installed. + You can force an implementation using ``--force-ipython|-I``, + ``--force-bpython|-B``, or ``--force-python|-P``. + +* **status**: List active nodes in this cluster + + .. code-block:: bash + + $ celery -A proj status + +* **result**: Show the result of a task + + .. code-block:: bash + + $ celery -A proj result -t tasks.add 4e196aa4-0141-4601-8138-7aa33db0f577 + + Note that you can omit the name of the task as long as the + task doesn't use a custom result backend. + +* **purge**: Purge messages from all configured task queues. + + .. warning:: + There is no undo for this operation, and messages will + be permanently deleted! + + .. code-block:: bash + + $ celery -A proj purge + + +* **inspect active**: List active tasks + + .. code-block:: bash + + $ celery -A proj inspect active + + These are all the tasks that are currently being executed. + +* **inspect scheduled**: List scheduled ETA tasks + + .. code-block:: bash + + $ celery -A proj inspect scheduled + + These are tasks reserved by the worker because they have the + `eta` or `countdown` argument set. + +* **inspect reserved**: List reserved tasks + + .. code-block:: bash + + $ celery -A proj inspect reserved + + This will list all tasks that have been prefetched by the worker, + and is currently waiting to be executed (does not include tasks + with an eta). + +* **inspect revoked**: List history of revoked tasks + + .. code-block:: bash + + $ celery -A proj inspect revoked + +* **inspect registered**: List registered tasks + + .. code-block:: bash + + $ celery -A proj inspect registered + +* **inspect stats**: Show worker statistics (see :ref:`worker-statistics`) + + .. code-block:: bash + + $ celery -A proj inspect stats + +* **control enable_events**: Enable events + + .. code-block:: bash + + $ celery -A proj control enable_events + +* **control disable_events**: Disable events + + .. code-block:: bash + + $ celery -A proj control disable_events + +* **migrate**: Migrate tasks from one broker to another (**EXPERIMENTAL**). + + .. code-block:: bash + + $ celery -A proj migrate redis://localhost amqp://localhost + + This command will migrate all the tasks on one broker to another. + As this command is new and experimental you should be sure to have + a backup of the data before proceeding. + +.. note:: + + All ``inspect`` and ``control`` commands supports a ``--timeout`` argument, + This is the number of seconds to wait for responses. + You may have to increase this timeout if you're not getting a response + due to latency. + +.. _inspect-destination: + +Specifying destination nodes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +By default the inspect and control commands operates on all workers. +You can specify a single, or a list of workers by using the +`--destination` argument: + +.. code-block:: bash + + $ celery -A proj inspect -d w1,w2 reserved + + $ celery -A proj control -d w1,w2 enable_events + + +.. _monitoring-flower: + +Flower: Real-time Celery web-monitor +------------------------------------ + +Flower is a real-time web based monitor and administration tool for Celery. +It is under active development, but is already an essential tool. +Being the recommended monitor for Celery, it obsoletes the Django-Admin +monitor, celerymon and the ncurses based monitor. + +Flower is pronounced like "flow", but you can also use the botanical version +if you prefer. + +Features +~~~~~~~~ + +- Real-time monitoring using Celery Events + + - Task progress and history. + - Ability to show task details (arguments, start time, runtime, and more) + - Graphs and statistics + +- Remote Control + + - View worker status and statistics. + - Shutdown and restart worker instances. + - Control worker pool size and autoscale settings. + - View and modify the queues a worker instance consumes from. + - View currently running tasks + - View scheduled tasks (ETA/countdown) + - View reserved and revoked tasks + - Apply time and rate limits + - Configuration viewer + - Revoke or terminate tasks + +- HTTP API +- OpenID authentication + +**Screenshots** + +.. figure:: ../images/dashboard.png + :width: 700px + +.. figure:: ../images/monitor.png + :width: 700px + +More screenshots_: + +.. _screenshots: https://github.com/mher/flower/tree/master/docs/screenshots + +Usage +~~~~~ + +You can use pip to install Flower: + +.. code-block:: bash + + $ pip install flower + +Running the flower command will start a web-server that you can visit: + +.. code-block:: bash + + $ celery -A proj flower + +The default port is http://localhost:5555, but you can change this using the `--port` argument: + +.. code-block:: bash + + $ celery -A proj flower --port=5555 + +Broker URL can also be passed through the `--broker` argument : + +.. code-block:: bash + + $ celery flower --broker=amqp://guest:guest@localhost:5672// + or + $ celery flower --broker=redis://guest:guest@localhost:6379/0 + +Then, you can visit flower in your web browser : + +.. code-block:: bash + + $ open http://localhost:5555 + + + +.. _monitoring-celeryev: + +celery events: Curses Monitor +----------------------------- + +.. versionadded:: 2.0 + +`celery events` is a simple curses monitor displaying +task and worker history. You can inspect the result and traceback of tasks, +and it also supports some management commands like rate limiting and shutting +down workers. This monitor was started as a proof of concept, and you +probably want to use Flower instead. + +Starting: + +.. code-block:: bash + + $ celery -A proj events + +You should see a screen like: + +.. figure:: ../images/celeryevshotsm.jpg + + +`celery events` is also used to start snapshot cameras (see +:ref:`monitoring-snapshots`: + +.. code-block:: bash + + $ celery -A proj events --camera= --frequency=1.0 + +and it includes a tool to dump events to :file:`stdout`: + +.. code-block:: bash + + $ celery -A proj events --dump + +For a complete list of options use ``--help``: + +.. code-block:: bash + + $ celery events --help + +.. _`celerymon`: http://github.com/celery/celerymon/ + +.. _monitoring-rabbitmq: + +RabbitMQ +======== + +To manage a Celery cluster it is important to know how +RabbitMQ can be monitored. + +RabbitMQ ships with the `rabbitmqctl(1)`_ command, +with this you can list queues, exchanges, bindings, +queue lengths, the memory usage of each queue, as well +as manage users, virtual hosts and their permissions. + +.. note:: + + The default virtual host (``"/"``) is used in these + examples, if you use a custom virtual host you have to add + the ``-p`` argument to the command, e.g: + ``rabbitmqctl list_queues -p my_vhost …`` + +.. _`rabbitmqctl(1)`: http://www.rabbitmq.com/man/rabbitmqctl.1.man.html + +.. _monitoring-rmq-queues: + +Inspecting queues +----------------- + +Finding the number of tasks in a queue: + +.. code-block:: bash + + $ rabbitmqctl list_queues name messages messages_ready \ + messages_unacknowledged + + +Here `messages_ready` is the number of messages ready +for delivery (sent but not received), `messages_unacknowledged` +is the number of messages that has been received by a worker but +not acknowledged yet (meaning it is in progress, or has been reserved). +`messages` is the sum of ready and unacknowledged messages. + + +Finding the number of workers currently consuming from a queue: + +.. code-block:: bash + + $ rabbitmqctl list_queues name consumers + +Finding the amount of memory allocated to a queue: + +.. code-block:: bash + + $ rabbitmqctl list_queues name memory + +:Tip: Adding the ``-q`` option to `rabbitmqctl(1)`_ makes the output + easier to parse. + + +.. _monitoring-redis: + +Redis +===== + +If you're using Redis as the broker, you can monitor the Celery cluster using +the `redis-cli(1)` command to list lengths of queues. + +.. _monitoring-redis-queues: + +Inspecting queues +----------------- + +Finding the number of tasks in a queue: + +.. code-block:: bash + + $ redis-cli -h HOST -p PORT -n DATABASE_NUMBER llen QUEUE_NAME + +The default queue is named `celery`. To get all available queues, invoke: + +.. code-block:: bash + + $ redis-cli -h HOST -p PORT -n DATABASE_NUMBER keys \* + +.. note:: + + Queue keys only exists when there are tasks in them, so if a key + does not exist it simply means there are no messages in that queue. + This is because in Redis a list with no elements in it is automatically + removed, and hence it won't show up in the `keys` command output, + and `llen` for that list returns 0. + + Also, if you're using Redis for other purposes, the + output of the `keys` command will include unrelated values stored in + the database. The recommended way around this is to use a + dedicated `DATABASE_NUMBER` for Celery, you can also use + database numbers to separate Celery applications from each other (virtual + hosts), but this will not affect the monitoring events used by e.g. Flower + as Redis pub/sub commands are global rather than database based. + +.. _monitoring-munin: + +Munin +===== + +This is a list of known Munin plug-ins that can be useful when +maintaining a Celery cluster. + +* rabbitmq-munin: Munin plug-ins for RabbitMQ. + + http://github.com/ask/rabbitmq-munin + +* celery_tasks: Monitors the number of times each task type has + been executed (requires `celerymon`). + + http://exchange.munin-monitoring.org/plugins/celery_tasks-2/details + +* celery_task_states: Monitors the number of tasks in each state + (requires `celerymon`). + + http://exchange.munin-monitoring.org/plugins/celery_tasks/details + + +.. _monitoring-events: + +Events +====== + +The worker has the ability to send a message whenever some event +happens. These events are then captured by tools like Flower, +and :program:`celery events` to monitor the cluster. + +.. _monitoring-snapshots: + +Snapshots +--------- + +.. versionadded:: 2.1 + +Even a single worker can produce a huge amount of events, so storing +the history of all events on disk may be very expensive. + +A sequence of events describes the cluster state in that time period, +by taking periodic snapshots of this state you can keep all history, but +still only periodically write it to disk. + +To take snapshots you need a Camera class, with this you can define +what should happen every time the state is captured; You can +write it to a database, send it by email or something else entirely. + +:program:`celery events` is then used to take snapshots with the camera, +for example if you want to capture state every 2 seconds using the +camera ``myapp.Camera`` you run :program:`celery events` with the following +arguments: + +.. code-block:: bash + + $ celery -A proj events -c myapp.Camera --frequency=2.0 + + +.. _monitoring-camera: + +Custom Camera +~~~~~~~~~~~~~ + +Cameras can be useful if you need to capture events and do something +with those events at an interval. For real-time event processing +you should use :class:`@events.Receiver` directly, like in +:ref:`event-real-time-example`. + +Here is an example camera, dumping the snapshot to screen: + +.. code-block:: python + + from pprint import pformat + + from celery.events.snapshot import Polaroid + + class DumpCam(Polaroid): + + def on_shutter(self, state): + if not state.event_count: + # No new events since last snapshot. + return + print('Workers: {0}'.format(pformat(state.workers, indent=4))) + print('Tasks: {0}'.format(pformat(state.tasks, indent=4))) + print('Total: {0.event_count} events, %s {0.task_count}'.format( + state)) + +See the API reference for :mod:`celery.events.state` to read more +about state objects. + +Now you can use this cam with :program:`celery events` by specifying +it with the :option:`-c` option: + +.. code-block:: bash + + $ celery -A proj events -c myapp.DumpCam --frequency=2.0 + +Or you can use it programmatically like this: + +.. code-block:: python + + from celery import Celery + from myapp import DumpCam + + def main(app, freq=1.0): + state = app.events.State() + with app.connection() as connection: + recv = app.events.Receiver(connection, handlers={'*': state.event}) + with DumpCam(state, freq=freq): + recv.capture(limit=None, timeout=None) + + if __name__ == '__main__': + app = Celery(broker='amqp://guest@localhost//') + main(app) + +.. _event-real-time-example: + +Real-time processing +-------------------- + +To process events in real-time you need the following + +- An event consumer (this is the ``Receiver``) + +- A set of handlers called when events come in. + + You can have different handlers for each event type, + or a catch-all handler can be used ('*') + +- State (optional) + + :class:`@events.State` is a convenient in-memory representation + of tasks and workers in the cluster that is updated as events come in. + + It encapsulates solutions for many common things, like checking if a + worker is still alive (by verifying heartbeats), merging event fields + together as events come in, making sure timestamps are in sync, and so on. + + +Combining these you can easily process events in real-time: + + +.. code-block:: python + + from celery import Celery + + + def my_monitor(app): + state = app.events.State() + + def announce_failed_tasks(event): + state.event(event) + # task name is sent only with -received event, and state + # will keep track of this for us. + task = state.tasks.get(event['uuid']) + + print('TASK FAILED: %s[%s] %s' % ( + task.name, task.uuid, task.info(), )) + + with app.connection() as connection: + recv = app.events.Receiver(connection, handlers={ + 'task-failed': announce_failed_tasks, + '*': state.event, + }) + recv.capture(limit=None, timeout=None, wakeup=True) + + if __name__ == '__main__': + app = Celery(broker='amqp://guest@localhost//') + my_monitor(app) + +.. note:: + + The wakeup argument to ``capture`` sends a signal to all workers + to force them to send a heartbeat. This way you can immediately see + workers when the monitor starts. + + +You can listen to specific events by specifying the handlers: + +.. code-block:: python + + from celery import Celery + + def my_monitor(app): + state = app.events.State() + + def announce_failed_tasks(event): + state.event(event) + # task name is sent only with -received event, and state + # will keep track of this for us. + task = state.tasks.get(event['uuid']) + + print('TASK FAILED: %s[%s] %s' % ( + task.name, task.uuid, task.info(), )) + + with app.connection() as connection: + recv = app.events.Receiver(connection, handlers={ + 'task-failed': announce_failed_tasks, + }) + recv.capture(limit=None, timeout=None, wakeup=True) + + if __name__ == '__main__': + app = Celery(broker='amqp://guest@localhost//') + my_monitor(app) + +.. _event-reference: + +Event Reference +=============== + +This list contains the events sent by the worker, and their arguments. + +.. _event-reference-task: + +Task Events +----------- + +.. event:: task-sent + +task-sent +~~~~~~~~~ + +:signature: ``task-sent(uuid, name, args, kwargs, retries, eta, expires, + queue, exchange, routing_key)`` + +Sent when a task message is published and +the :setting:`CELERY_SEND_TASK_SENT_EVENT` setting is enabled. + +.. event:: task-received + +task-received +~~~~~~~~~~~~~ + +:signature: ``task-received(uuid, name, args, kwargs, retries, eta, hostname, + timestamp)`` + +Sent when the worker receives a task. + +.. event:: task-started + +task-started +~~~~~~~~~~~~ + +:signature: ``task-started(uuid, hostname, timestamp, pid)`` + +Sent just before the worker executes the task. + +.. event:: task-succeeded + +task-succeeded +~~~~~~~~~~~~~~ + +:signature: ``task-succeeded(uuid, result, runtime, hostname, timestamp)`` + +Sent if the task executed successfully. + +Runtime is the time it took to execute the task using the pool. +(Starting from the task is sent to the worker pool, and ending when the +pool result handler callback is called). + +.. event:: task-failed + +task-failed +~~~~~~~~~~~ + +:signature: ``task-failed(uuid, exception, traceback, hostname, timestamp)`` + +Sent if the execution of the task failed. + +.. event:: task-revoked + +task-revoked +~~~~~~~~~~~~ + +:signature: ``task-revoked(uuid, terminated, signum, expired)`` + +Sent if the task has been revoked (Note that this is likely +to be sent by more than one worker). + +- ``terminated`` is set to true if the task process was terminated, + and the ``signum`` field set to the signal used. + +- ``expired`` is set to true if the task expired. + +.. event:: task-retried + +task-retried +~~~~~~~~~~~~ + +:signature: ``task-retried(uuid, exception, traceback, hostname, timestamp)`` + +Sent if the task failed, but will be retried in the future. + +.. _event-reference-worker: + +Worker Events +------------- + +.. event:: worker-online + +worker-online +~~~~~~~~~~~~~ + +:signature: ``worker-online(hostname, timestamp, freq, sw_ident, sw_ver, sw_sys)`` + +The worker has connected to the broker and is online. + +- `hostname`: Hostname of the worker. +- `timestamp`: Event timestamp. +- `freq`: Heartbeat frequency in seconds (float). +- `sw_ident`: Name of worker software (e.g. ``py-celery``). +- `sw_ver`: Software version (e.g. 2.2.0). +- `sw_sys`: Operating System (e.g. Linux, Windows, Darwin). + +.. event:: worker-heartbeat + +worker-heartbeat +~~~~~~~~~~~~~~~~ + +:signature: ``worker-heartbeat(hostname, timestamp, freq, sw_ident, sw_ver, sw_sys, + active, processed)`` + +Sent every minute, if the worker has not sent a heartbeat in 2 minutes, +it is considered to be offline. + +- `hostname`: Hostname of the worker. +- `timestamp`: Event timestamp. +- `freq`: Heartbeat frequency in seconds (float). +- `sw_ident`: Name of worker software (e.g. ``py-celery``). +- `sw_ver`: Software version (e.g. 2.2.0). +- `sw_sys`: Operating System (e.g. Linux, Windows, Darwin). +- `active`: Number of currently executing tasks. +- `processed`: Total number of tasks processed by this worker. + +.. event:: worker-offline + +worker-offline +~~~~~~~~~~~~~~ + +:signature: ``worker-offline(hostname, timestamp, freq, sw_ident, sw_ver, sw_sys)`` + +The worker has disconnected from the broker. diff --git a/docs/userguide/optimizing.rst b/docs/userguide/optimizing.rst new file mode 100644 index 0000000..459069f --- /dev/null +++ b/docs/userguide/optimizing.rst @@ -0,0 +1,227 @@ +.. _guide-optimizing: + +============ + Optimizing +============ + +Introduction +============ +The default configuration makes a lot of compromises. It's not optimal for +any single case, but works well enough for most situations. + +There are optimizations that can be applied based on specific use cases. + +Optimizations can apply to different properties of the running environment, +be it the time tasks take to execute, the amount of memory used, or +responsiveness at times of high load. + +Ensuring Operations +=================== + +In the book `Programming Pearls`_, Jon Bentley presents the concept of +back-of-the-envelope calculations by asking the question; + + ❝ How much water flows out of the Mississippi River in a day? ❞ + +The point of this exercise [*]_ is to show that there is a limit +to how much data a system can process in a timely manner. +Back of the envelope calculations can be used as a means to plan for this +ahead of time. + +In Celery; If a task takes 10 minutes to complete, +and there are 10 new tasks coming in every minute, the queue will never +be empty. This is why it's very important +that you monitor queue lengths! + +A way to do this is by :ref:`using Munin `. +You should set up alerts, that will notify you as soon as any queue has +reached an unacceptable size. This way you can take appropriate action +like adding new worker nodes, or revoking unnecessary tasks. + +.. [*] The chapter is available to read for free here: + `The back of the envelope`_. The book is a classic text. Highly + recommended. + +.. _`Programming Pearls`: http://www.cs.bell-labs.com/cm/cs/pearls/ + +.. _`The back of the envelope`: + http://books.google.com/books?id=kse_7qbWbjsC&pg=PA67 + +.. _optimizing-general-settings: + +General Settings +================ + +.. _optimizing-librabbitmq: + +librabbitmq +----------- + +If you're using RabbitMQ (AMQP) as the broker then you can install the +:mod:`librabbitmq` module to use an optimized client written in C: + +.. code-block:: bash + + $ pip install librabbitmq + +The 'amqp' transport will automatically use the librabbitmq module if it's +installed, or you can also specify the transport you want directly by using +the ``pyamqp://`` or ``librabbitmq://`` prefixes. + +.. _optimizing-connection-pools: + +Broker Connection Pools +----------------------- + +The broker connection pool is enabled by default since version 2.5. + +You can tweak the :setting:`BROKER_POOL_LIMIT` setting to minimize +contention, and the value should be based on the number of +active threads/greenthreads using broker connections. + +.. _optimizing-transient-queues: + +Using Transient Queues +---------------------- + +Queues created by Celery are persistent by default. This means that +the broker will write messages to disk to ensure that the tasks will +be executed even if the broker is restarted. + +But in some cases it's fine that the message is lost, so not all tasks +require durability. You can create a *transient* queue for these tasks +to improve performance: + +.. code-block:: python + + from kombu import Exchange, Queue + + CELERY_QUEUES = ( + Queue('celery', routing_key='celery'), + Queue('transient', routing_key='transient', + delivery_mode=1), + ) + + +The ``delivery_mode`` changes how the messages to this queue are delivered. +A value of 1 means that the message will not be written to disk, and a value +of 2 (default) means that the message can be written to disk. + +To direct a task to your new transient queue you can specify the queue +argument (or use the :setting:`CELERY_ROUTES` setting): + +.. code-block:: python + + task.apply_async(args, queue='transient') + +For more information see the :ref:`routing guide `. + +.. _optimizing-worker-settings: + +Worker Settings +=============== + +.. _optimizing-prefetch-limit: + +Prefetch Limits +--------------- + +*Prefetch* is a term inherited from AMQP that is often misunderstood +by users. + +The prefetch limit is a **limit** for the number of tasks (messages) a worker +can reserve for itself. If it is zero, the worker will keep +consuming messages, not respecting that there may be other +available worker nodes that may be able to process them sooner [*]_, +or that the messages may not even fit in memory. + +The workers' default prefetch count is the +:setting:`CELERYD_PREFETCH_MULTIPLIER` setting multiplied by the number +of concurrency slots[*]_ (processes/threads/greenthreads). + +If you have many tasks with a long duration you want +the multiplier value to be 1, which means it will only reserve one +task per worker process at a time. + +However -- If you have many short-running tasks, and throughput/round trip +latency is important to you, this number should be large. The worker is +able to process more tasks per second if the messages have already been +prefetched, and is available in memory. You may have to experiment to find +the best value that works for you. Values like 50 or 150 might make sense in +these circumstances. Say 64, or 128. + +If you have a combination of long- and short-running tasks, the best option +is to use two worker nodes that are configured separately, and route +the tasks according to the run-time. (see :ref:`guide-routing`). + +.. [*] RabbitMQ and other brokers deliver messages round-robin, + so this doesn't apply to an active system. If there is no prefetch + limit and you restart the cluster, there will be timing delays between + nodes starting. If there are 3 offline nodes and one active node, + all messages will be delivered to the active node. + +.. [*] This is the concurrency setting; :setting:`CELERYD_CONCURRENCY` or the + :option:`-c` option to the :program:`celery worker` program. + + +Reserve one task at a time +-------------------------- + +When using early acknowledgement (default), a prefetch multiplier of 1 +means the worker will reserve at most one extra task for every active +worker process. + +When users ask if it's possible to disable "prefetching of tasks", often +what they really want is to have a worker only reserve as many tasks as there +are child processes. + +But this is not possible without enabling late acknowledgements +acknowledgements; A task that has been started, will be +retried if the worker crashes mid execution so the task must be `idempotent`_ +(see also notes at :ref:`faq-acks_late-vs-retry`). + +.. _`idempotent`: http://en.wikipedia.org/wiki/Idempotent + +You can enable this behavior by using the following configuration options: + +.. code-block:: python + + CELERY_ACKS_LATE = True + CELERYD_PREFETCH_MULTIPLIER = 1 + +.. _prefork-pool-prefetch: + +Prefork pool prefetch settings +------------------------------ + +The prefork pool will asynchronously send as many tasks to the processes +as it can and this means that the processes are, in effect, prefetching +tasks. + +This benefits performance but it also means that tasks may be stuck +waiting for long running tasks to complete:: + + -> send T1 to Process A + # A executes T1 + -> send T2 to Process B + # B executes T2 + <- T2 complete + + -> send T3 to Process A + # A still executing T1, T3 stuck in local buffer and + # will not start until T1 returns + +The worker will send tasks to the process as long as the pipe buffer is +writable. The pipe buffer size varies based on the operating system: some may +have a buffer as small as 64kb but on recent Linux versions the buffer +size is 1MB (can only be changed system wide). + +You can disable this prefetching behavior by enabling the :option:`-Ofair` +worker option: + +.. code-block:: bash + + $ celery -A proj worker -l info -Ofair + +With this option enabled the worker will only write to processes that are +available for work, disabling the prefetch behavior. diff --git a/docs/userguide/periodic-tasks.rst b/docs/userguide/periodic-tasks.rst new file mode 100644 index 0000000..a81c45f --- /dev/null +++ b/docs/userguide/periodic-tasks.rst @@ -0,0 +1,293 @@ +.. _guide-beat: + +================ + Periodic Tasks +================ + +.. contents:: + :local: + +Introduction +============ + +:program:`celery beat` is a scheduler. It kicks off tasks at regular intervals, +which are then executed by the worker nodes available in the cluster. + +By default the entries are taken from the :setting:`CELERYBEAT_SCHEDULE` setting, +but custom stores can also be used, like storing the entries +in an SQL database. + +You have to ensure only a single scheduler is running for a schedule +at a time, otherwise you would end up with duplicate tasks. Using +a centralized approach means the schedule does not have to be synchronized, +and the service can operate without using locks. + +.. _beat-timezones: + +Time Zones +========== + +The periodic task schedules uses the UTC time zone by default, +but you can change the time zone used using the :setting:`CELERY_TIMEZONE` +setting. + +An example time zone could be `Europe/London`: + +.. code-block:: python + + CELERY_TIMEZONE = 'Europe/London' + + +This setting must be added to your app, either by configuration it directly +using (``app.conf.CELERY_TIMEZONE = 'Europe/London'``), or by adding +it to your configuration module if you have set one up using +``app.config_from_object``. See :ref:`celerytut-configuration` for +more information about configuration options. + + +The default scheduler (storing the schedule in the :file:`celerybeat-schedule` +file) will automatically detect that the time zone has changed, and so will +reset the schedule itself, but other schedulers may not be so smart (e.g. the +Django database scheduler, see below) and in that case you will have to reset the +schedule manually. + +.. admonition:: Django Users + + Celery recommends and is compatible with the new ``USE_TZ`` setting introduced + in Django 1.4. + + For Django users the time zone specified in the ``TIME_ZONE`` setting + will be used, or you can specify a custom time zone for Celery alone + by using the :setting:`CELERY_TIMEZONE` setting. + + The database scheduler will not reset when timezone related settings + change, so you must do this manually: + + .. code-block:: bash + + $ python manage.py shell + >>> from djcelery.models import PeriodicTask + >>> PeriodicTask.objects.update(last_run_at=None) + +.. _beat-entries: + +Entries +======= + +To schedule a task periodically you have to add an entry to the +:setting:`CELERYBEAT_SCHEDULE` setting. + +Example: Run the `tasks.add` task every 30 seconds. + +.. code-block:: python + + from datetime import timedelta + + CELERYBEAT_SCHEDULE = { + 'add-every-30-seconds': { + 'task': 'tasks.add', + 'schedule': timedelta(seconds=30), + 'args': (16, 16) + }, + } + + CELERY_TIMEZONE = 'UTC' + + +.. note:: + + If you are wondering where these settings should go then + please see :ref:`celerytut-configuration`. You can either + set these options on your app directly or you can keep + a separate module for configuration. + +Using a :class:`~datetime.timedelta` for the schedule means the task will +be sent in 30 second intervals (the first task will be sent 30 seconds +after `celery beat` starts, and then every 30 seconds +after the last run). + +A crontab like schedule also exists, see the section on `Crontab schedules`_. + +Like with ``cron``, the tasks may overlap if the first task does not complete +before the next. If that is a concern you should use a locking +strategy to ensure only one instance can run at a time (see for example +:ref:`cookbook-task-serial`). + +.. _beat-entry-fields: + +Available Fields +---------------- + +* `task` + + The name of the task to execute. + +* `schedule` + + The frequency of execution. + + This can be the number of seconds as an integer, a + :class:`~datetime.timedelta`, or a :class:`~celery.schedules.crontab`. + You can also define your own custom schedule types, by extending the + interface of :class:`~celery.schedules.schedule`. + +* `args` + + Positional arguments (:class:`list` or :class:`tuple`). + +* `kwargs` + + Keyword arguments (:class:`dict`). + +* `options` + + Execution options (:class:`dict`). + + This can be any argument supported by + :meth:`~celery.task.base.Task.apply_async`, + e.g. `exchange`, `routing_key`, `expires`, and so on. + +* `relative` + + By default :class:`~datetime.timedelta` schedules are scheduled + "by the clock". This means the frequency is rounded to the nearest + second, minute, hour or day depending on the period of the timedelta. + + If `relative` is true the frequency is not rounded and will be + relative to the time when :program:`celery beat` was started. + +.. _beat-crontab: + +Crontab schedules +================= + +If you want more control over when the task is executed, for +example, a particular time of day or day of the week, you can use +the :class:`~celery.schedules.crontab` schedule type: + +.. code-block:: python + + from celery.schedules import crontab + + CELERYBEAT_SCHEDULE = { + # Executes every Monday morning at 7:30 A.M + 'add-every-monday-morning': { + 'task': 'tasks.add', + 'schedule': crontab(hour=7, minute=30, day_of_week=1), + 'args': (16, 16), + }, + } + +The syntax of these crontab expressions are very flexible. Some examples: + ++-----------------------------------------+--------------------------------------------+ +| **Example** | **Meaning** | ++-----------------------------------------+--------------------------------------------+ +| ``crontab()`` | Execute every minute. | ++-----------------------------------------+--------------------------------------------+ +| ``crontab(minute=0, hour=0)`` | Execute daily at midnight. | ++-----------------------------------------+--------------------------------------------+ +| ``crontab(minute=0, hour='*/3')`` | Execute every three hours: | +| | midnight, 3am, 6am, 9am, | +| | noon, 3pm, 6pm, 9pm. | ++-----------------------------------------+--------------------------------------------+ +| ``crontab(minute=0,`` | Same as previous. | +| ``hour='0,3,6,9,12,15,18,21')`` | | ++-----------------------------------------+--------------------------------------------+ +| ``crontab(minute='*/15')`` | Execute every 15 minutes. | ++-----------------------------------------+--------------------------------------------+ +| ``crontab(day_of_week='sunday')`` | Execute every minute (!) at Sundays. | ++-----------------------------------------+--------------------------------------------+ +| ``crontab(minute='*',`` | Same as previous. | +| ``hour='*',`` | | +| ``day_of_week='sun')`` | | ++-----------------------------------------+--------------------------------------------+ +| ``crontab(minute='*/10',`` | Execute every ten minutes, but only | +| ``hour='3,17,22',`` | between 3-4 am, 5-6 pm and 10-11 pm on | +| ``day_of_week='thu,fri')`` | Thursdays or Fridays. | ++-----------------------------------------+--------------------------------------------+ +| ``crontab(minute=0, hour='*/2,*/3')`` | Execute every even hour, and every hour | +| | divisible by three. This means: | +| | at every hour *except*: 1am, | +| | 5am, 7am, 11am, 1pm, 5pm, 7pm, | +| | 11pm | ++-----------------------------------------+--------------------------------------------+ +| ``crontab(minute=0, hour='*/5')`` | Execute hour divisible by 5. This means | +| | that it is triggered at 3pm, not 5pm | +| | (since 3pm equals the 24-hour clock | +| | value of "15", which is divisible by 5). | ++-----------------------------------------+--------------------------------------------+ +| ``crontab(minute=0, hour='*/3,8-17')`` | Execute every hour divisible by 3, and | +| | every hour during office hours (8am-5pm). | ++-----------------------------------------+--------------------------------------------+ +| ``crontab(day_of_month='2')`` | Execute on the second day of every month. | +| | | ++-----------------------------------------+--------------------------------------------+ +| ``crontab(day_of_month='2-30/3')`` | Execute on every even numbered day. | +| | | ++-----------------------------------------+--------------------------------------------+ +| ``crontab(day_of_month='1-7,15-21')`` | Execute on the first and third weeks of | +| | the month. | ++-----------------------------------------+--------------------------------------------+ +| ``crontab(day_of_month='11',`` | Execute on 11th of May every year. | +| ``month_of_year='5')`` | | ++-----------------------------------------+--------------------------------------------+ +| ``crontab(month_of_year='*/3')`` | Execute on the first month of every | +| | quarter. | ++-----------------------------------------+--------------------------------------------+ + +See :class:`celery.schedules.crontab` for more documentation. + +.. _beat-starting: + +Starting the Scheduler +====================== + +To start the :program:`celery beat` service: + +.. code-block:: bash + + $ celery -A proj beat + +You can also start embed `beat` inside the worker by enabling +workers `-B` option, this is convenient if you will never run +more than one worker node, but it's not commonly used and for that +reason is not recommended for production use: + +.. code-block:: bash + + $ celery -A proj worker -B + +Beat needs to store the last run times of the tasks in a local database +file (named `celerybeat-schedule` by default), so it needs access to +write in the current directory, or alternatively you can specify a custom +location for this file: + +.. code-block:: bash + + $ celery -A proj beat -s /home/celery/var/run/celerybeat-schedule + + +.. note:: + + To daemonize beat see :ref:`daemonizing`. + +.. _beat-custom-schedulers: + +Using custom scheduler classes +------------------------------ + +Custom scheduler classes can be specified on the command-line (the `-S` +argument). The default scheduler is :class:`celery.beat.PersistentScheduler`, +which is simply keeping track of the last run times in a local database file +(a :mod:`shelve`). + +`django-celery` also ships with a scheduler that stores the schedule in the +Django database: + +.. code-block:: bash + + $ celery -A proj beat -S djcelery.schedulers.DatabaseScheduler + +Using `django-celery`'s scheduler you can add, modify and remove periodic +tasks from the Django Admin. diff --git a/docs/userguide/remote-tasks.rst b/docs/userguide/remote-tasks.rst new file mode 100644 index 0000000..e5f4aa8 --- /dev/null +++ b/docs/userguide/remote-tasks.rst @@ -0,0 +1,125 @@ +.. _guide-webhooks: + +================================ + HTTP Callback Tasks (Webhooks) +================================ + +.. module:: celery.task.http + +.. contents:: + :local: + +.. _webhook-basics: + +Basics +====== + +If you need to call into another language, framework or similar, you can +do so by using HTTP callback tasks. + +The HTTP callback tasks uses GET/POST data to pass arguments and returns +result as a JSON response. The scheme to call a task is:: + + GET http://example.com/mytask/?arg1=a&arg2=b&arg3=c + +or using POST:: + + POST http://example.com/mytask + +.. note:: + + POST data needs to be form encoded. + +Whether to use GET or POST is up to you and your requirements. + +The web page should then return a response in the following format +if the execution was successful:: + + {'status': 'success', 'retval': …} + +or if there was an error:: + + {'status': 'failure', 'reason': 'Invalid moon alignment.'} + +Enabling the HTTP task +---------------------- + +To enable the HTTP dispatch task you have to add :mod:`celery.task.http` +to :setting:`CELERY_IMPORTS`, or start the worker with ``-I +celery.task.http``. + + +.. _webhook-django-example: + +Django webhook example +====================== + +With this information you could define a simple task in Django: + +.. code-block:: python + + from django.http import HttpResponse + from anyjson import serialize + + + def multiply(request): + x = int(request.GET['x']) + y = int(request.GET['y']) + result = x * y + response = {'status': 'success', 'retval': result} + return HttpResponse(serialize(response), mimetype='application/json') + +.. _webhook-rails-example: + +Ruby on Rails webhook example +============================= + +or in Ruby on Rails: + +.. code-block:: ruby + + def multiply + @x = params[:x].to_i + @y = params[:y].to_i + + @status = {:status => 'success', :retval => @x * @y} + + render :json => @status + end + +You can easily port this scheme to any language/framework; +new examples and libraries are very welcome. + +.. _webhook-calling: + +Calling webhook tasks +===================== + +To call a task you can use the :class:`~celery.task.http.URL` class: + + >>> from celery.task.http import URL + >>> res = URL('http://example.com/multiply').get_async(x=10, y=10) + + +:class:`~celery.task.http.URL` is a shortcut to the :class:`HttpDispatchTask`. +You can subclass this to extend the +functionality. + + >>> from celery.task.http import HttpDispatchTask + >>> res = HttpDispatchTask.delay( + ... url='http://example.com/multiply', + ... method='GET', x=10, y=10) + >>> res.get() + 100 + +The output of :program:`celery worker` (or the log file if enabled) should show the +task being executed:: + + [INFO/MainProcess] Task celery.task.http.HttpDispatchTask + [f2cc8efc-2a14-40cd-85ad-f1c77c94beeb] processed: 100 + +Since calling tasks can be done via HTTP using the +:func:`djcelery.views.apply` view, calling tasks from other languages is easy. +For an example service exposing tasks via HTTP you should have a look at +`examples/celery_http_gateway` in the Celery distribution: +http://github.com/celery/celery/tree/master/examples/celery_http_gateway/ diff --git a/docs/userguide/routing.rst b/docs/userguide/routing.rst new file mode 100644 index 0000000..9ea1e50 --- /dev/null +++ b/docs/userguide/routing.rst @@ -0,0 +1,584 @@ +.. _guide-routing: + +=============== + Routing Tasks +=============== + +.. note:: + + Alternate routing concepts like topic and fanout may not be + available for all transports, please consult the + :ref:`transport comparison table `. + +.. contents:: + :local: + + +.. _routing-basics: + +Basics +====== + +.. _routing-automatic: + +Automatic routing +----------------- + +The simplest way to do routing is to use the +:setting:`CELERY_CREATE_MISSING_QUEUES` setting (on by default). + +With this setting on, a named queue that is not already defined in +:setting:`CELERY_QUEUES` will be created automatically. This makes it easy to +perform simple routing tasks. + +Say you have two servers, `x`, and `y` that handles regular tasks, +and one server `z`, that only handles feed related tasks. You can use this +configuration:: + + CELERY_ROUTES = {'feed.tasks.import_feed': {'queue': 'feeds'}} + +With this route enabled import feed tasks will be routed to the +`"feeds"` queue, while all other tasks will be routed to the default queue +(named `"celery"` for historical reasons). + +Now you can start server `z` to only process the feeds queue like this: + +.. code-block:: bash + + user@z:/$ celery -A proj worker -Q feeds + +You can specify as many queues as you want, so you can make this server +process the default queue as well: + +.. code-block:: bash + + user@z:/$ celery -A proj worker -Q feeds,celery + +.. _routing-changing-default-queue: + +Changing the name of the default queue +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +You can change the name of the default queue by using the following +configuration: + +.. code-block:: python + + from kombu import Exchange, Queue + + CELERY_DEFAULT_QUEUE = 'default' + CELERY_QUEUES = ( + Queue('default', Exchange('default'), routing_key='default'), + ) + +.. _routing-autoqueue-details: + +How the queues are defined +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The point with this feature is to hide the complex AMQP protocol for users +with only basic needs. However -- you may still be interested in how these queues +are declared. + +A queue named `"video"` will be created with the following settings: + +.. code-block:: python + + {'exchange': 'video', + 'exchange_type': 'direct', + 'routing_key': 'video'} + +The non-AMQP backends like `ghettoq` does not support exchanges, so they +require the exchange to have the same name as the queue. Using this design +ensures it will work for them as well. + +.. _routing-manual: + +Manual routing +-------------- + +Say you have two servers, `x`, and `y` that handles regular tasks, +and one server `z`, that only handles feed related tasks, you can use this +configuration: + +.. code-block:: python + + from kombu import Queue + + CELERY_DEFAULT_QUEUE = 'default' + CELERY_QUEUES = ( + Queue('default', routing_key='task.#'), + Queue('feed_tasks', routing_key='feed.#'), + ) + CELERY_DEFAULT_EXCHANGE = 'tasks' + CELERY_DEFAULT_EXCHANGE_TYPE = 'topic' + CELERY_DEFAULT_ROUTING_KEY = 'task.default' + +:setting:`CELERY_QUEUES` is a list of :class:`~kombu.entitity.Queue` +instances. +If you don't set the exchange or exchange type values for a key, these +will be taken from the :setting:`CELERY_DEFAULT_EXCHANGE` and +:setting:`CELERY_DEFAULT_EXCHANGE_TYPE` settings. + +To route a task to the `feed_tasks` queue, you can add an entry in the +:setting:`CELERY_ROUTES` setting: + +.. code-block:: python + + CELERY_ROUTES = { + 'feeds.tasks.import_feed': { + 'queue': 'feed_tasks', + 'routing_key': 'feed.import', + }, + } + + +You can also override this using the `routing_key` argument to +:meth:`Task.apply_async`, or :func:`~celery.execute.send_task`: + + >>> from feeds.tasks import import_feed + >>> import_feed.apply_async(args=['http://cnn.com/rss'], + ... queue='feed_tasks', + ... routing_key='feed.import') + + +To make server `z` consume from the feed queue exclusively you can +start it with the ``-Q`` option: + +.. code-block:: bash + + user@z:/$ celery -A proj worker -Q feed_tasks --hostname=z@%h + +Servers `x` and `y` must be configured to consume from the default queue: + +.. code-block:: bash + + user@x:/$ celery -A proj worker -Q default --hostname=x@%h + user@y:/$ celery -A proj worker -Q default --hostname=y@%h + +If you want, you can even have your feed processing worker handle regular +tasks as well, maybe in times when there's a lot of work to do: + +.. code-block:: python + + user@z:/$ celery -A proj worker -Q feed_tasks,default --hostname=z@%h + +If you have another queue but on another exchange you want to add, +just specify a custom exchange and exchange type: + +.. code-block:: python + + from kombu import Exchange, Queue + + CELERY_QUEUES = ( + Queue('feed_tasks', routing_key='feed.#'), + Queue('regular_tasks', routing_key='task.#'), + Queue('image_tasks', exchange=Exchange('mediatasks', type='direct'), + routing_key='image.compress'), + ) + +If you're confused about these terms, you should read up on AMQP. + +.. seealso:: + + In addition to the :ref:`amqp-primer` below, there's + `Rabbits and Warrens`_, an excellent blog post describing queues and + exchanges. There's also AMQP in 10 minutes*: `Flexible Routing Model`_, + and `Standard Exchange Types`_. For users of RabbitMQ the `RabbitMQ FAQ`_ + could be useful as a source of information. + +.. _`Rabbits and Warrens`: http://blogs.digitar.com/jjww/2009/01/rabbits-and-warrens/ +.. _`Flexible Routing Model`: http://bit.ly/95XFO1 +.. _`Standard Exchange Types`: http://bit.ly/EEWca +.. _`RabbitMQ FAQ`: http://www.rabbitmq.com/faq.html + +.. _amqp-primer: + +AMQP Primer +=========== + +Messages +-------- + +A message consists of headers and a body. Celery uses headers to store +the content type of the message and its content encoding. The +content type is usually the serialization format used to serialize the +message. The body contains the name of the task to execute, the +task id (UUID), the arguments to apply it with and some additional +metadata -- like the number of retries or an ETA. + +This is an example task message represented as a Python dictionary: + +.. code-block:: python + + {'task': 'myapp.tasks.add', + 'id': '54086c5e-6193-4575-8308-dbab76798756', + 'args': [4, 4], + 'kwargs': {}} + +.. _amqp-producers-consumers-brokers: + +Producers, consumers and brokers +-------------------------------- + +The client sending messages is typically called a *publisher*, or +a *producer*, while the entity receiving messages is called +a *consumer*. + +The *broker* is the message server, routing messages from producers +to consumers. + +You are likely to see these terms used a lot in AMQP related material. + +.. _amqp-exchanges-queues-keys: + +Exchanges, queues and routing keys. +----------------------------------- + +1. Messages are sent to exchanges. +2. An exchange routes messages to one or more queues. Several exchange types + exists, providing different ways to do routing, or implementing + different messaging scenarios. +3. The message waits in the queue until someone consumes it. +4. The message is deleted from the queue when it has been acknowledged. + +The steps required to send and receive messages are: + +1. Create an exchange +2. Create a queue +3. Bind the queue to the exchange. + +Celery automatically creates the entities necessary for the queues in +:setting:`CELERY_QUEUES` to work (except if the queue's `auto_declare` +setting is set to :const:`False`). + +Here's an example queue configuration with three queues; +One for video, one for images and one default queue for everything else: + +.. code-block:: python + + from kombu import Exchange, Queue + + CELERY_QUEUES = ( + Queue('default', Exchange('default'), routing_key='default'), + Queue('videos', Exchange('media'), routing_key='media.video'), + Queue('images', Exchange('media'), routing_key='media.image'), + ) + CELERY_DEFAULT_QUEUE = 'default' + CELERY_DEFAULT_EXCHANGE_TYPE = 'direct' + CELERY_DEFAULT_ROUTING_KEY = 'default' + +.. _amqp-exchange-types: + +Exchange types +-------------- + +The exchange type defines how the messages are routed through the exchange. +The exchange types defined in the standard are `direct`, `topic`, +`fanout` and `headers`. Also non-standard exchange types are available +as plug-ins to RabbitMQ, like the `last-value-cache plug-in`_ by Michael +Bridgen. + +.. _`last-value-cache plug-in`: + http://github.com/squaremo/rabbitmq-lvc-plugin + +.. _amqp-exchange-type-direct: + +Direct exchanges +~~~~~~~~~~~~~~~~ + +Direct exchanges match by exact routing keys, so a queue bound by +the routing key `video` only receives messages with that routing key. + +.. _amqp-exchange-type-topic: + +Topic exchanges +~~~~~~~~~~~~~~~ + +Topic exchanges matches routing keys using dot-separated words, and the +wildcard characters: ``*`` (matches a single word), and ``#`` (matches +zero or more words). + +With routing keys like ``usa.news``, ``usa.weather``, ``norway.news`` and +``norway.weather``, bindings could be ``*.news`` (all news), ``usa.#`` (all +items in the USA) or ``usa.weather`` (all USA weather items). + +.. _amqp-api: + +Related API commands +-------------------- + +.. method:: exchange.declare(exchange_name, type, passive, + durable, auto_delete, internal) + + Declares an exchange by name. + + :keyword passive: Passive means the exchange won't be created, but you + can use this to check if the exchange already exists. + + :keyword durable: Durable exchanges are persistent. That is - they survive + a broker restart. + + :keyword auto_delete: This means the queue will be deleted by the broker + when there are no more queues using it. + + +.. method:: queue.declare(queue_name, passive, durable, exclusive, auto_delete) + + Declares a queue by name. + + Exclusive queues can only be consumed from by the current connection. + Exclusive also implies `auto_delete`. + +.. method:: queue.bind(queue_name, exchange_name, routing_key) + + Binds a queue to an exchange with a routing key. + Unbound queues will not receive messages, so this is necessary. + +.. method:: queue.delete(name, if_unused=False, if_empty=False) + + Deletes a queue and its binding. + +.. method:: exchange.delete(name, if_unused=False) + + Deletes an exchange. + +.. note:: + + Declaring does not necessarily mean "create". When you declare you + *assert* that the entity exists and that it's operable. There is no + rule as to whom should initially create the exchange/queue/binding, + whether consumer or producer. Usually the first one to need it will + be the one to create it. + +.. _amqp-api-hands-on: + +Hands-on with the API +--------------------- + +Celery comes with a tool called :program:`celery amqp` +that is used for command line access to the AMQP API, enabling access to +administration tasks like creating/deleting queues and exchanges, purging +queues or sending messages. It can also be used for non-AMQP brokers, +but different implementation may not implement all commands. + +You can write commands directly in the arguments to :program:`celery amqp`, +or just start with no arguments to start it in shell-mode: + +.. code-block:: bash + + $ celery -A proj amqp + -> connecting to amqp://guest@localhost:5672/. + -> connected. + 1> + +Here ``1>`` is the prompt. The number 1, is the number of commands you +have executed so far. Type ``help`` for a list of commands available. +It also supports auto-completion, so you can start typing a command and then +hit the `tab` key to show a list of possible matches. + +Let's create a queue you can send messages to: + +.. code-block:: bash + + $ celery -A proj amqp + 1> exchange.declare testexchange direct + ok. + 2> queue.declare testqueue + ok. queue:testqueue messages:0 consumers:0. + 3> queue.bind testqueue testexchange testkey + ok. + +This created the direct exchange ``testexchange``, and a queue +named ``testqueue``. The queue is bound to the exchange using +the routing key ``testkey``. + +From now on all messages sent to the exchange ``testexchange`` with routing +key ``testkey`` will be moved to this queue. You can send a message by +using the ``basic.publish`` command:: + + 4> basic.publish 'This is a message!' testexchange testkey + ok. + +Now that the message is sent you can retrieve it again. You can use the +``basic.get``` command here, which polls for new messages on the queue +(which is alright for maintainence tasks, for services you'd want to use +``basic.consume`` instead) + +Pop a message off the queue:: + + 5> basic.get testqueue + {'body': 'This is a message!', + 'delivery_info': {'delivery_tag': 1, + 'exchange': u'testexchange', + 'message_count': 0, + 'redelivered': False, + 'routing_key': u'testkey'}, + 'properties': {}} + + +AMQP uses acknowledgment to signify that a message has been received +and processed successfully. If the message has not been acknowledged +and consumer channel is closed, the message will be delivered to +another consumer. + +Note the delivery tag listed in the structure above; Within a connection +channel, every received message has a unique delivery tag, +This tag is used to acknowledge the message. Also note that +delivery tags are not unique across connections, so in another client +the delivery tag `1` might point to a different message than in this channel. + +You can acknowledge the message you received using ``basic.ack``:: + + 6> basic.ack 1 + ok. + +To clean up after our test session you should delete the entities you created:: + + 7> queue.delete testqueue + ok. 0 messages deleted. + 8> exchange.delete testexchange + ok. + + +.. _routing-tasks: + +Routing Tasks +============= + +.. _routing-defining-queues: + +Defining queues +--------------- + +In Celery available queues are defined by the :setting:`CELERY_QUEUES` setting. + +Here's an example queue configuration with three queues; +One for video, one for images and one default queue for everything else: + +.. code-block:: python + + default_exchange = Exchange('default', type='direct') + media_exchange = Exchange('media', type='direct') + + CELERY_QUEUES = ( + Queue('default', default_exchange, routing_key='default'), + Queue('videos', media_exchange, routing_key='media.video'), + Queue('images', media_exchange, routing_key='media.image') + ) + CELERY_DEFAULT_QUEUE = 'default' + CELERY_DEFAULT_EXCHANGE = 'default' + CELERY_DEFAULT_ROUTING_KEY = 'default' + +Here, the :setting:`CELERY_DEFAULT_QUEUE` will be used to route tasks that +doesn't have an explicit route. + +The default exchange, exchange type and routing key will be used as the +default routing values for tasks, and as the default values for entries +in :setting:`CELERY_QUEUES`. + +.. _routing-task-destination: + +Specifying task destination +--------------------------- + +The destination for a task is decided by the following (in order): + +1. The :ref:`routers` defined in :setting:`CELERY_ROUTES`. +2. The routing arguments to :func:`Task.apply_async`. +3. Routing related attributes defined on the :class:`~celery.task.base.Task` + itself. + +It is considered best practice to not hard-code these settings, but rather +leave that as configuration options by using :ref:`routers`; +This is the most flexible approach, but sensible defaults can still be set +as task attributes. + +.. _routers: + +Routers +------- + +A router is a class that decides the routing options for a task. + +All you need to define a new router is to create a class with a +``route_for_task`` method: + +.. code-block:: python + + class MyRouter(object): + + def route_for_task(self, task, args=None, kwargs=None): + if task == 'myapp.tasks.compress_video': + return {'exchange': 'video', + 'exchange_type': 'topic', + 'routing_key': 'video.compress'} + return None + +If you return the ``queue`` key, it will expand with the defined settings of +that queue in :setting:`CELERY_QUEUES`: + +.. code-block:: javascript + + {'queue': 'video', 'routing_key': 'video.compress'} + +becomes --> + +.. code-block:: javascript + + {'queue': 'video', + 'exchange': 'video', + 'exchange_type': 'topic', + 'routing_key': 'video.compress'} + + +You install router classes by adding them to the :setting:`CELERY_ROUTES` +setting:: + + CELERY_ROUTES = (MyRouter(), ) + +Router classes can also be added by name:: + + CELERY_ROUTES = ('myapp.routers.MyRouter', ) + + +For simple task name -> route mappings like the router example above, +you can simply drop a dict into :setting:`CELERY_ROUTES` to get the +same behavior: + +.. code-block:: python + + CELERY_ROUTES = ({'myapp.tasks.compress_video': { + 'queue': 'video', + 'routing_key': 'video.compress' + }}, ) + +The routers will then be traversed in order, it will stop at the first router +returning a true value, and use that as the final route for the task. + +Broadcast +--------- + +Celery can also support broadcast routing. +Here is an example exchange ``broadcast_tasks`` that delivers +copies of tasks to all workers connected to it: + +.. code-block:: python + + from kombu.common import Broadcast + + CELERY_QUEUES = (Broadcast('broadcast_tasks'), ) + + CELERY_ROUTES = {'tasks.reload_cache': {'queue': 'broadcast_tasks'}} + +Now the ``tasks.reload_tasks`` task will be sent to every +worker consuming from this queue. + +.. admonition:: Broadcast & Results + + Note that Celery result does not define what happens if two + tasks have the same task_id. If the same task is distributed to more + than one worker, then the state history may not be preserved. + + It is a good idea to set the ``task.ignore_result`` attribute in + this case. diff --git a/docs/userguide/security.rst b/docs/userguide/security.rst new file mode 100644 index 0000000..ef3cd96 --- /dev/null +++ b/docs/userguide/security.rst @@ -0,0 +1,240 @@ +.. _guide-security: + +========== + Security +========== + +.. contents:: + :local: + +Introduction +============ + +While Celery is written with security in mind, it should be treated as an +unsafe component. + +Depending on your `Security Policy`_, there are +various steps you can take to make your Celery installation more secure. + + +.. _`Security Policy`: http://en.wikipedia.org/wiki/Security_policy + + +Areas of Concern +================ + +Broker +------ + +It is imperative that the broker is guarded from unwanted access, especially +if accessible to the public. +By default, workers trust that the data they get from the broker has not +been tampered with. See `Message Signing`_ for information on how to make +the broker connection more trustworthy. + +The first line of defence should be to put a firewall in front of the broker, +allowing only white-listed machines to access it. + +Keep in mind that both firewall misconfiguration, and temporarily disabling +the firewall, is common in the real world. Solid security policy includes +monitoring of firewall equipment to detect if they have been disabled, be it +accidentally or on purpose. + +In other words, one should not blindly trust the firewall either. + +If your broker supports fine-grained access control, like RabbitMQ, +this is something you should look at enabling. See for example +http://www.rabbitmq.com/access-control.html. + +Client +------ + +In Celery, "client" refers to anything that sends messages to the +broker, e.g. web-servers that apply tasks. + +Having the broker properly secured doesn't matter if arbitrary messages +can be sent through a client. + +*[Need more text here]* + +Worker +------ + +The default permissions of tasks running inside a worker are the same ones as +the privileges of the worker itself. This applies to resources such as +memory, file-systems and devices. + +An exception to this rule is when using the multiprocessing based task pool, +which is currently the default. In this case, the task will have access to +any memory copied as a result of the :func:`fork` call (does not apply +under MS Windows), and access to memory contents written +by parent tasks in the same worker child process. + +Limiting access to memory contents can be done by launching every task +in a subprocess (:func:`fork` + :func:`execve`). + +Limiting file-system and device access can be accomplished by using +`chroot`_, `jail`_, `sandboxing`_, virtual machines or other +mechanisms as enabled by the platform or additional software. + +Note also that any task executed in the worker will have the +same network access as the machine on which it's running. If the worker +is located on an internal network it's recommended to add firewall rules for +outbound traffic. + +.. _`chroot`: http://en.wikipedia.org/wiki/Chroot +.. _`jail`: http://en.wikipedia.org/wiki/FreeBSD_jail +.. _`sandboxing`: + http://en.wikipedia.org/wiki/Sandbox_(computer_security) + +Serializers +=========== + +The default `pickle` serializer is convenient because it supports +arbitrary Python objects, whereas other serializers only +work with a restricted set of types. + +But for the same reasons the `pickle` serializer is inherently insecure [*]_, +and should be avoided whenever clients are untrusted or +unauthenticated. + +.. [*] http://nadiana.com/python-pickle-insecure + +You can disable untrusted content by specifying +a white-list of accepted content-types in the :setting:`CELERY_ACCEPT_CONTENT` +setting: + +.. versionadded:: 3.0.18 + +.. note:: + + This setting was first supported in version 3.0.18. If you're + running an earlier version it will simply be ignored, so make + sure you're running a version that supports it. + +.. code-block:: python + + CELERY_ACCEPT_CONTENT = ['json'] + + +This accepts a list of serializer names and content-types, so you could +also specify the content type for json: + +.. code-block:: python + + CELERY_ACCEPT_CONTENT = ['application/json'] + +Celery also comes with a special `auth` serializer that validates +communication between Celery clients and workers, making sure +that messages originates from trusted sources. +Using `Public-key cryptography` the `auth` serializer can verify the +authenticity of senders, to enable this read :ref:`message-signing` +for more information. + +.. _`pickle`: http://docs.python.org/library/pickle.html +.. _`Public-key cryptography`: + http://en.wikipedia.org/wiki/Public-key_cryptography + +.. _message-signing: + +Message Signing +=============== + +Celery can use the `pyOpenSSL`_ library to sign message using +`Public-key cryptography`, where +messages sent by clients are signed using a private key +and then later verified by the worker using a public certificate. + +Optimally certificates should be signed by an official +`Certificate Authority`_, but they can also be self-signed. + +To enable this you should configure the :setting:`CELERY_TASK_SERIALIZER` +setting to use the `auth` serializer. +Also required is configuring the +paths used to locate private keys and certificates on the file-system: +the :setting:`CELERY_SECURITY_KEY`, +:setting:`CELERY_SECURITY_CERTIFICATE` and :setting:`CELERY_SECURITY_CERT_STORE` +settings respectively. +With these configured it is also necessary to call the +:func:`celery.setup_security` function. Note that this will also +disable all insecure serializers so that the worker won't accept +messages with untrusted content types. + +This is an example configuration using the `auth` serializer, +with the private key and certificate files located in `/etc/ssl`. + +.. code-block:: python + + CELERY_SECURITY_KEY = '/etc/ssl/private/worker.key' + CELERY_SECURITY_CERTIFICATE = '/etc/ssl/certs/worker.pem' + CELERY_SECURITY_CERT_STORE = '/etc/ssl/certs/*.pem' + from celery.security import setup_security + setup_security() + +.. note:: + + While relative paths are not disallowed, using absolute paths + is recommended for these files. + + Also note that the `auth` serializer won't encrypt the contents of + a message, so if needed this will have to be enabled separately. + +.. _`pyOpenSSL`: http://pypi.python.org/pypi/pyOpenSSL +.. _`X.509`: http://en.wikipedia.org/wiki/X.509 +.. _`Certificate Authority`: + http://en.wikipedia.org/wiki/Certificate_authority + +Intrusion Detection +=================== + +The most important part when defending your systems against +intruders is being able to detect if the system has been compromised. + +Logs +---- + +Logs are usually the first place to look for evidence +of security breaches, but they are useless if they can be tampered with. + +A good solution is to set up centralized logging with a dedicated logging +server. Acess to it should be restricted. +In addition to having all of the logs in a single place, if configured +correctly, it can make it harder for intruders to tamper with your logs. + +This should be fairly easy to setup using syslog (see also `syslog-ng`_ and +`rsyslog`_.). Celery uses the :mod:`logging` library, and already has +support for using syslog. + +A tip for the paranoid is to send logs using UDP and cut the +transmit part of the logging server's network cable :-) + +.. _`syslog-ng`: http://en.wikipedia.org/wiki/Syslog-ng +.. _`rsyslog`: http://www.rsyslog.com/ + +Tripwire +-------- + +`Tripwire`_ is a (now commercial) data integrity tool, with several +open source implementations, used to keep +cryptographic hashes of files in the file-system, so that administrators +can be alerted when they change. This way when the damage is done and your +system has been compromised you can tell exactly what files intruders +have changed (password files, logs, backdoors, rootkits and so on). +Often this is the only way you will be able to detect an intrusion. + +Some open source implementations include: + +* `OSSEC`_ +* `Samhain`_ +* `Open Source Tripwire`_ +* `AIDE`_ + +Also, the `ZFS`_ file-system comes with built-in integrity checks +that can be used. + +.. _`Tripwire`: http://tripwire.com/ +.. _`OSSEC`: http://www.ossec.net/ +.. _`Samhain`: http://la-samhna.de/samhain/index.html +.. _`AIDE`: http://aide.sourceforge.net/ +.. _`Open Source Tripwire`: http://sourceforge.net/projects/tripwire/ +.. _`ZFS`: http://en.wikipedia.org/wiki/ZFS diff --git a/docs/userguide/signals.rst b/docs/userguide/signals.rst new file mode 100644 index 0000000..4d6d72e --- /dev/null +++ b/docs/userguide/signals.rst @@ -0,0 +1,654 @@ +.. _signals: + +======= +Signals +======= + +.. contents:: + :local: + +Signals allows decoupled applications to receive notifications when +certain actions occur elsewhere in the application. + +Celery ships with many signals that you application can hook into +to augment behavior of certain actions. + +.. _signal-basics: + +Basics +====== + +Several kinds of events trigger signals, you can connect to these signals +to perform actions as they trigger. + +Example connecting to the :signal:`after_task_publish` signal: + +.. code-block:: python + + from celery.signals import after_task_publish + + @after_task_publish.connect + def task_sent_handler(sender=None, body=None, **kwargs): + print('after_task_publish for task id {body[id]}'.format( + body=body, + )) + + +Some signals also have a sender which you can filter by. For example the +:signal:`after_task_publish` signal uses the task name as a sender, so by +providing the ``sender`` argument to +:class:`~celery.utils.dispatch.signal.Signal.connect` you can +connect your handler to be called every time a task with name `"proj.tasks.add"` +is published: + +.. code-block:: python + + @after_task_publish.connect(sender='proj.tasks.add') + def task_sent_handler(sender=None, body=None, **kwargs): + print('after_task_publish for task id {body[id]}'.format( + body=body, + )) + +Signals use the same implementation as django.core.dispatch. As a result other +keyword parameters (e.g. signal) are passed to all signal handlers by default. + +The best practice for signal handlers is to accept arbitrary keyword +arguments (i.e. ``**kwargs``). That way new celery versions can add additional +arguments without breaking user code. + +.. _signal-ref: + +Signals +======= + +Task Signals +------------ + +.. signal:: before_task_publish + +before_task_publish +~~~~~~~~~~~~~~~~~~~ +.. versionadded:: 3.1 + +Dispatched before a task is published. +Note that this is executed in the process sending the task. + +Sender is the name of the task being sent. + +Provides arguements: + +* body + + Task message body. + + This is a mapping containing the task message fields + (see :ref:`task-message-protocol-v1`). + +* exchange + + Name of the exchange to send to or a :class:`~kombu.Exchange` object. + +* routing_key + + Routing key to use when sending the message. + +* headers + + Application headers mapping (can be modified). + +* properties + + Message properties (can be modified) + +* declare + + List of entities (:class:`~kombu.Exchange`, + :class:`~kombu.Queue` or :class:~`kombu.binding` to declare before + publishing the message. Can be modified. + +* retry_policy + + Mapping of retry options. Can be any argument to + :meth:`kombu.Connection.ensure` and can be modified. + +.. signal:: after_task_publish + +after_task_publish +~~~~~~~~~~~~~~~~~~ + +Dispatched when a task has been sent to the broker. +Note that this is executed in the process that sent the task. + +Sender is the name of the task being sent. + +Provides arguments: + +* body + + The task message body, see :ref:`task-message-protocol-v1` + for a reference of possible fields that can be defined. + +* exchange + + Name of the exchange or :class:`~kombu.Exchange` object used. + +* routing_key + + Routing key used. + +.. signal:: task_prerun + +task_prerun +~~~~~~~~~~~ + +Dispatched before a task is executed. + +Sender is the task object being executed. + +Provides arguments: + +* task_id + Id of the task to be executed. + +* task + The task being executed. + +* args + the tasks positional arguments. + +* kwargs + The tasks keyword arguments. + +.. signal:: task_postrun + +task_postrun +~~~~~~~~~~~~ + +Dispatched after a task has been executed. + +Sender is the task object executed. + +Provides arguments: + +* task_id + Id of the task to be executed. + +* task + The task being executed. + +* args + The tasks positional arguments. + +* kwargs + The tasks keyword arguments. + +* retval + The return value of the task. + +* state + + Name of the resulting state. + +.. signal:: task_retry + +task_retry +~~~~~~~~~~ + +Dispatched when a task will be retried. + +Sender is the task object. + +Provides arguments: + +* request + + The current task request. + +* reason + + Reason for retry (usually an exception instance, but can always be + coerced to :class:`str`). + +* einfo + + Detailed exception information, including traceback + (a :class:`billiard.einfo.ExceptionInfo` object). + + +.. signal:: task_success + +task_success +~~~~~~~~~~~~ + +Dispatched when a task succeeds. + +Sender is the task object executed. + +Provides arguments + +* result + Return value of the task. + +.. signal:: task_failure + +task_failure +~~~~~~~~~~~~ + +Dispatched when a task fails. + +Sender is the task object executed. + +Provides arguments: + +* task_id + Id of the task. + +* exception + Exception instance raised. + +* args + Positional arguments the task was called with. + +* kwargs + Keyword arguments the task was called with. + +* traceback + Stack trace object. + +* einfo + The :class:`celery.datastructures.ExceptionInfo` instance. + +.. signal:: task_revoked + +task_revoked +~~~~~~~~~~~~ + +Dispatched when a task is revoked/terminated by the worker. + +Sender is the task object revoked/terminated. + +Provides arguments: + +* request + + This is a :class:`~celery.worker.job.Request` instance, and not + ``task.request``. When using the prefork pool this signal + is dispatched in the parent process, so ``task.request`` is not available + and should not be used. Use this object instead, which should have many + of the same fields. + +* terminated + Set to :const:`True` if the task was terminated. + +* signum + Signal number used to terminate the task. If this is :const:`None` and + terminated is :const:`True` then :sig:`TERM` should be assumed. + +* expired + Set to :const:`True` if the task expired. + +App Signals +----------- + +.. signal:: import_modules + +import_modules +~~~~~~~~~~~~~~ + +This signal is sent when a program (worker, beat, shell) etc, asks +for modules in the :setting:`CELERY_INCLUDE` and :setting:`CELERY_IMPORTS` +settings to be imported. + +Sender is the app instance. + +Worker Signals +-------------- + +.. signal:: celeryd_after_setup + +celeryd_after_setup +~~~~~~~~~~~~~~~~~~~ + +This signal is sent after the worker instance is set up, +but before it calls run. This means that any queues from the :option:`-Q` +option is enabled, logging has been set up and so on. + +It can be used to e.g. add custom queues that should always be consumed +from, disregarding the :option:`-Q` option. Here's an example +that sets up a direct queue for each worker, these queues can then be +used to route a task to any specific worker: + +.. code-block:: python + + from celery.signals import celeryd_after_setup + + @celeryd_after_setup.connect + def setup_direct_queue(sender, instance, **kwargs): + queue_name = '{0}.dq'.format(sender) # sender is the nodename of the worker + instance.app.amqp.queues.select_add(queue_name) + +Provides arguments: + +* sender + Hostname of the worker. + +* instance + This is the :class:`celery.apps.worker.Worker` instance to be initialized. + Note that only the :attr:`app` and :attr:`hostname` (nodename) attributes have been + set so far, and the rest of ``__init__`` has not been executed. + +* conf + The configuration of the current app. + + +.. signal:: celeryd_init + +celeryd_init +~~~~~~~~~~~~ + +This is the first signal sent when :program:`celery worker` starts up. +The ``sender`` is the host name of the worker, so this signal can be used +to setup worker specific configuration: + +.. code-block:: python + + from celery.signals import celeryd_init + + @celeryd_init.connect(sender='worker12@example.com') + def configure_worker12(conf=None, **kwargs): + conf.CELERY_DEFAULT_RATE_LIMIT = '10/m' + +or to set up configuration for multiple workers you can omit specifying a +sender when you connect: + +.. code-block:: python + + from celery.signals import celeryd_init + + @celeryd_init.connect + def configure_workers(sender=None, conf=None, **kwargs): + if sender in ('worker1@example.com', 'worker2@example.com'): + conf.CELERY_DEFAULT_RATE_LIMIT = '10/m' + if sender == 'worker3@example.com': + conf.CELERYD_PREFETCH_MULTIPLIER = 0 + +Provides arguments: + +* sender + Nodename of the worker. + +* instance + This is the :class:`celery.apps.worker.Worker` instance to be initialized. + Note that only the :attr:`app` and :attr:`hostname` (nodename) attributes have been + set so far, and the rest of ``__init__`` has not been executed. + +* conf + The configuration of the current app. + +* options + + Options passed to the worker from command-line arguments (including + defaults). + +.. signal:: worker_init + +worker_init +~~~~~~~~~~~ + +Dispatched before the worker is started. + +.. signal:: worker_ready + +worker_ready +~~~~~~~~~~~~ + +Dispatched when the worker is ready to accept work. + +.. signal:: worker_process_init + +worker_process_init +~~~~~~~~~~~~~~~~~~~ + +Dispatched in all pool child processes when they start. + +Note that handlers attached to this signal must not be blocking +for more than 4 seconds, or the process will be killed assuming +it failed to start. + +.. signal:: worker_process_shutdown + +worker_process_shutdown +~~~~~~~~~~~~~~~~~~~~~~~ + +Dispatched in all pool child processes just before they exit. + +Note: There is no guarantee that this signal will be dispatched, +similarly to finally blocks it's impossible to guarantee that handlers +will be called at shutdown, and if called it may be interrupted during. + +Provides arguments: + +* pid + + The pid of the child process that is about to shutdown. + +* exitcode + + The exitcode that will be used when the child process exits. + +.. signal:: worker_shutdown + +worker_shutdown +~~~~~~~~~~~~~~~ + +Dispatched when the worker is about to shut down. + +Beat Signals +------------ + +.. signal:: beat_init + +beat_init +~~~~~~~~~ + +Dispatched when :program:`celery beat` starts (either standalone or embedded). +Sender is the :class:`celery.beat.Service` instance. + +.. signal:: beat_embedded_init + +beat_embedded_init +~~~~~~~~~~~~~~~~~~ + +Dispatched in addition to the :signal:`beat_init` signal when :program:`celery +beat` is started as an embedded process. Sender is the +:class:`celery.beat.Service` instance. + +Eventlet Signals +---------------- + +.. signal:: eventlet_pool_started + +eventlet_pool_started +~~~~~~~~~~~~~~~~~~~~~ + +Sent when the eventlet pool has been started. + +Sender is the :class:`celery.concurrency.eventlet.TaskPool` instance. + +.. signal:: eventlet_pool_preshutdown + +eventlet_pool_preshutdown +~~~~~~~~~~~~~~~~~~~~~~~~~ + +Sent when the worker shutdown, just before the eventlet pool +is requested to wait for remaining workers. + +Sender is the :class:`celery.concurrency.eventlet.TaskPool` instance. + +.. signal:: eventlet_pool_postshutdown + +eventlet_pool_postshutdown +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Sent when the pool has been joined and the worker is ready to shutdown. + +Sender is the :class:`celery.concurrency.eventlet.TaskPool` instance. + +.. signal:: eventlet_pool_apply + +eventlet_pool_apply +~~~~~~~~~~~~~~~~~~~ + +Sent whenever a task is applied to the pool. + +Sender is the :class:`celery.concurrency.eventlet.TaskPool` instance. + +Provides arguments: + +* target + + The target function. + +* args + + Positional arguments. + +* kwargs + + Keyword arguments. + +Logging Signals +--------------- + +.. signal:: setup_logging + +setup_logging +~~~~~~~~~~~~~ + +Celery won't configure the loggers if this signal is connected, +so you can use this to completely override the logging configuration +with your own. + +If you would like to augment the logging configuration setup by +Celery then you can use the :signal:`after_setup_logger` and +:signal:`after_setup_task_logger` signals. + +Provides arguments: + +* loglevel + The level of the logging object. + +* logfile + The name of the logfile. + +* format + The log format string. + +* colorize + Specify if log messages are colored or not. + +.. signal:: after_setup_logger + +after_setup_logger +~~~~~~~~~~~~~~~~~~ + +Sent after the setup of every global logger (not task loggers). +Used to augment logging configuration. + +Provides arguments: + +* logger + The logger object. + +* loglevel + The level of the logging object. + +* logfile + The name of the logfile. + +* format + The log format string. + +* colorize + Specify if log messages are colored or not. + +.. signal:: after_setup_task_logger + +after_setup_task_logger +~~~~~~~~~~~~~~~~~~~~~~~ + +Sent after the setup of every single task logger. +Used to augment logging configuration. + +Provides arguments: + +* logger + The logger object. + +* loglevel + The level of the logging object. + +* logfile + The name of the logfile. + +* format + The log format string. + +* colorize + Specify if log messages are colored or not. + +Command signals +--------------- + +.. signal:: user_preload_options + +user_preload_options +~~~~~~~~~~~~~~~~~~~~ + +This signal is sent after any of the Celery command line programs +are finished parsing the user preload options. + +It can be used to add additional command-line arguments to the +:program:`celery` umbrella command: + +.. code-block:: python + + from celery import Celery + from celery import signals + from celery.bin.base import Option + + app = Celery() + app.user_options['preload'].add(Option( + '--monitoring', action='store_true', + help='Enable our external monitoring utility, blahblah', + )) + + @signals.user_preload_options.connect + def handle_preload_options(options, **kwargs): + if options['monitoring']: + enable_monitoring() + + +Sender is the :class:`~celery.bin.base.Command` instance, which depends +on what program was called (e.g. for the umbrella command it will be +a :class:`~celery.bin.celery.CeleryCommand`) object). + +Provides arguments: + +* app + + The app instance. + +* options + + Mapping of the parsed user preload options (with default values). + +Deprecated Signals +------------------ + +.. signal:: task_sent + +task_sent +~~~~~~~~~ + +This signal is deprecated, please use :signal:`after_task_publish` instead. diff --git a/docs/userguide/tasks.rst b/docs/userguide/tasks.rst new file mode 100644 index 0000000..02075a5 --- /dev/null +++ b/docs/userguide/tasks.rst @@ -0,0 +1,1610 @@ +.. _guide-tasks: + +======= + Tasks +======= + +Tasks are the building blocks of Celery applications. + +A task is a class that can be created out of any callable. It performs +dual roles in that it defines both what happens when a task is +called (sends a message), and what happens when a worker receives that message. + +Every task class has a unique name, and this name is referenced in messages +so that the worker can find the right function to execute. + +A task message does not disappear +until the message has been :term:`acknowledged` by a worker. A worker can reserve +many messages in advance and even if the worker is killed -- caused by power failure +or otherwise -- the message will be redelivered to another worker. + +Ideally task functions should be :term:`idempotent`, which means that +the function will not cause unintented effects even if called +multiple times with the same arguments. +Since the worker cannot detect if your tasks are idempotent, the default +behavior is to acknowledge the message in advance, before it's executed, +so that a task that has already been started is never executed again.. + +If your task is idempotent you can set the :attr:`acks_late` option +to have the worker acknowledge the message *after* the task returns +instead. See also the FAQ entry :ref:`faq-acks_late-vs-retry`. + +-- + +In this chapter you will learn all about defining tasks, +and this is the **table of contents**: + +.. contents:: + :local: + :depth: 1 + + +.. _task-basics: + +Basics +====== + +You can easily create a task from any callable by using +the :meth:`~@Celery.task` decorator: + +.. code-block:: python + + from .models import User + + @app.task + def create_user(username, password): + User.objects.create(username=username, password=password) + + +There are also many :ref:`options ` that can be set for the task, +these can be specified as arguments to the decorator: + +.. code-block:: python + + @app.task(serializer='json') + def create_user(username, password): + User.objects.create(username=username, password=password) + + + +.. sidebar:: How do I import the task decorator? And what is "app"? + + The task decorator is available on your :class:`@Celery` application instance, + if you don't know what that is then please read :ref:`first-steps`. + + If you're using Django or are still using the "old" module based celery API, + then you can import the task decorator like this:: + + from celery import task + + @task + def add(x, y): + return x + y + +.. sidebar:: Multiple decorators + + When using multiple decorators in combination with the task + decorator you must make sure that the `task` + decorator is applied last (which in Python oddly means that it must + be the first in the list): + + .. code-block:: python + + @app.task + @decorator2 + @decorator1 + def add(x, y): + return x + y + +.. _task-names: + +Names +===== + +Every task must have a unique name, and a new name +will be generated out of the function name if a custom name is not provided. + +For example: + +.. code-block:: python + + >>> @app.task(name='sum-of-two-numbers') + >>> def add(x, y): + ... return x + y + + >>> add.name + 'sum-of-two-numbers' + +A best practice is to use the module name as a namespace, +this way names won't collide if there's already a task with that name +defined in another module. + +.. code-block:: python + + >>> @app.task(name='tasks.add') + >>> def add(x, y): + ... return x + y + +You can tell the name of the task by investigating its name attribute:: + + >>> add.name + 'tasks.add' + +Which is exactly the name that would have been generated anyway, +if the module name is "tasks.py": + +:file:`tasks.py`: + +.. code-block:: python + + @app.task + def add(x, y): + return x + y + + >>> from tasks import add + >>> add.name + 'tasks.add' + +.. _task-naming-relative-imports: + +Automatic naming and relative imports +------------------------------------- + +Relative imports and automatic name generation does not go well together, +so if you're using relative imports you should set the name explicitly. + +For example if the client imports the module "myapp.tasks" as ".tasks", and +the worker imports the module as "myapp.tasks", the generated names won't match +and an :exc:`~@NotRegistered` error will be raised by the worker. + +This is also the case when using Django and using `project.myapp`-style +naming in ``INSTALLED_APPS``: + +.. code-block:: python + + INSTALLED_APPS = ['project.myapp'] + +If you install the app under the name ``project.myapp`` then the +tasks module will be imported as ``project.myapp.tasks``, +so you must make sure you always import the tasks using the same name: + +.. code-block:: python + + >>> from project.myapp.tasks import mytask # << GOOD + + >>> from myapp.tasks import mytask # << BAD!!! + +The second example will cause the task to be named differently +since the worker and the client imports the modules under different names: + +.. code-block:: python + + >>> from project.myapp.tasks import mytask + >>> mytask.name + 'project.myapp.tasks.mytask' + + >>> from myapp.tasks import mytask + >>> mytask.name + 'myapp.tasks.mytask' + +So for this reason you must be consistent in how you +import modules, which is also a Python best practice. + +Similarly, you should not use old-style relative imports: + +.. code-block:: python + + from module import foo # BAD! + + from proj.module import foo # GOOD! + +New-style relative imports are fine and can be used: + +.. code-block:: python + + from .module import foo # GOOD! + +If you want to use Celery with a project already using these patterns +extensively and you don't have the time to refactor the existing code +then you can consider specifying the names explicitly instead of relying +on the automatic naming: + +.. code-block:: python + + @task(name='proj.tasks.add') + def add(x, y): + return x + y + +.. _task-request-info: + +Context +======= + +:attr:`~@Task.request` contains information and state related to +the executing task. + +The request defines the following attributes: + +:id: The unique id of the executing task. + +:group: The unique id a group, if this task is a member. + +:chord: The unique id of the chord this task belongs to (if the task + is part of the header). + +:args: Positional arguments. + +:kwargs: Keyword arguments. + +:retries: How many times the current task has been retried. + An integer starting at `0`. + +:is_eager: Set to :const:`True` if the task is executed locally in + the client, and not by a worker. + +:eta: The original ETA of the task (if any). + This is in UTC time (depending on the :setting:`CELERY_ENABLE_UTC` + setting). + +:expires: The original expiry time of the task (if any). + This is in UTC time (depending on the :setting:`CELERY_ENABLE_UTC` + setting). + +:logfile: The file the worker logs to. See `Logging`_. + +:loglevel: The current log level used. + +:hostname: Hostname of the worker instance executing the task. + +:delivery_info: Additional message delivery information. This is a mapping + containing the exchange and routing key used to deliver this + task. Used by e.g. :meth:`~@Task.retry` + to resend the task to the same destination queue. + Availability of keys in this dict depends on the + message broker used. + +:called_directly: This flag is set to true if the task was not + executed by the worker. + +:callbacks: A list of subtasks to be called if this task returns successfully. + +:errback: A list of subtasks to be called if this task fails. + +:utc: Set to true the caller has utc enabled (:setting:`CELERY_ENABLE_UTC`). + + +.. versionadded:: 3.1 + +:headers: Mapping of message headers (may be :const:`None`). + +:reply_to: Where to send reply to (queue name). + +:correlation_id: Usually the same as the task id, often used in amqp + to keep track of what a reply is for. + + +An example task accessing information in the context is: + +.. code-block:: python + + @app.task(bind=True) + def dump_context(self, x, y): + print('Executing task id {0.id}, args: {0.args!r} kwargs: {0.kwargs!r}'.format( + self.request)) + + +The ``bind`` argument means that the function will be a "bound method" so +that you can access attributes and methods on the task type instance. + +.. _task-logging: + +Logging +======= + +The worker will automatically set up logging for you, or you can +configure logging manually. + +A special logger is available named "celery.task", you can inherit +from this logger to automatically get the task name and unique id as part +of the logs. + +The best practice is to create a common logger +for all of your tasks at the top of your module: + +.. code-block:: python + + from celery.utils.log import get_task_logger + + logger = get_task_logger(__name__) + + @app.task + def add(x, y): + logger.info('Adding {0} + {1}'.format(x, y)) + return x + y + +Celery uses the standard Python logger library, +for which documentation can be found in the :mod:`logging` +module. + +You can also use :func:`print`, as anything written to standard +out/-err will be redirected to logging system (you can disable this, +see :setting:`CELERY_REDIRECT_STDOUTS`). + +.. _task-retry: + +Retrying +======== + +:meth:`~@Task.retry` can be used to re-execute the task, +for example in the event of recoverable errors. + +When you call ``retry`` it will send a new message, using the same +task-id, and it will take care to make sure the message is delivered +to the same queue as the originating task. + +When a task is retried this is also recorded as a task state, +so that you can track the progress of the task using the result +instance (see :ref:`task-states`). + +Here's an example using ``retry``: + +.. code-block:: python + + @app.task(bind=True) + def send_twitter_status(self, oauth, tweet): + try: + twitter = Twitter(oauth) + twitter.update_status(tweet) + except (Twitter.FailWhaleError, Twitter.LoginError) as exc: + raise self.retry(exc=exc) + +.. note:: + + The :meth:`~@Task.retry` call will raise an exception so any code after the retry + will not be reached. This is the :exc:`~@Retry` + exception, it is not handled as an error but rather as a semi-predicate + to signify to the worker that the task is to be retried, + so that it can store the correct state when a result backend is enabled. + + This is normal operation and always happens unless the + ``throw`` argument to retry is set to :const:`False`. + +The bind argument to the task decorator will give access to ``self`` (the +task type instance). + +The ``exc`` method is used to pass exception information that is +used in logs, and when storing task results. +Both the exception and the traceback will +be available in the task state (if a result backend is enabled). + +If the task has a ``max_retries`` value the current exception +will be re-raised if the max number of retries has been exceeded, +but this will not happen if: + +- An ``exc`` argument was not given. + + In this case the :exc:`~@MaxRetriesExceeded` + exception will be raised. + +- There is no current exception + + If there's no original exception to re-raise the ``exc`` + argument will be used instead, so: + + .. code-block:: python + + self.retry(exc=Twitter.LoginError()) + + will raise the ``exc`` argument given. + +.. _task-retry-custom-delay: + +Using a custom retry delay +-------------------------- + +When a task is to be retried, it can wait for a given amount of time +before doing so, and the default delay is defined by the +:attr:`~@Task.default_retry_delay` +attribute. By default this is set to 3 minutes. Note that the +unit for setting the delay is in seconds (int or float). + +You can also provide the `countdown` argument to :meth:`~@Task.retry` to +override this default. + +.. code-block:: python + + @app.task(bind=True, default_retry_delay=30 * 60) # retry in 30 minutes. + def add(self, x, y): + try: + … + except Exception as exc: + raise self.retry(exc=exc, countdown=60) # override the default and + # retry in 1 minute + +.. _task-options: + +List of Options +=============== + +The task decorator can take a number of options that change the way +the task behaves, for example you can set the rate limit for a task +using the :attr:`rate_limit` option. + +Any keyword argument passed to the task decorator will actually be set +as an attribute of the resulting task class, and this is a list +of the built-in attributes. + +General +------- + +.. _task-general-options: + +.. attribute:: Task.name + + The name the task is registered as. + + You can set this name manually, or a name will be + automatically generated using the module and class name. See + :ref:`task-names`. + +.. attribute:: Task.request + + If the task is being executed this will contain information + about the current request. Thread local storage is used. + + See :ref:`task-request-info`. + +.. attribute:: Task.abstract + + Abstract classes are not registered, but are used as the + base class for new task types. + +.. attribute:: Task.max_retries + + The maximum number of attempted retries before giving up. + If the number of retries exceeds this value a :exc:`~@MaxRetriesExceeded` + exception will be raised. *NOTE:* You have to call :meth:`~@Task.retry` + manually, as it will not automatically retry on exception.. + + The default value is 3. + A value of :const:`None` will disable the retry limit and the + task will retry forever until it succeeds. + +.. attribute:: Task.throws + + Optional tuple of expected error classes that should not be regarded + as an actual error. + + Errors in this list will be reported as a failure to the result backend, + but the worker will not log the event as an error, and no traceback will + be included. + + Example: + + .. code-block:: python + + @task(throws=(KeyError, HttpNotFound)): + def get_foo(): + something() + + Error types: + + - Expected errors (in ``Task.throws``) + + Logged with severity ``INFO``, traceback excluded. + + - Unexpected errors + + Logged with severity ``ERROR``, with traceback included. + +.. attribute:: Task.default_retry_delay + + Default time in seconds before a retry of the task + should be executed. Can be either :class:`int` or :class:`float`. + Default is a 3 minute delay. + +.. attribute:: Task.rate_limit + + Set the rate limit for this task type which limits the number of tasks + that can be run in a given time frame. Tasks will still complete when + a rate limit is in effect, but it may take some time before it's allowed to + start. + + If this is :const:`None` no rate limit is in effect. + If it is an integer or float, it is interpreted as "tasks per second". + + The rate limits can be specified in seconds, minutes or hours + by appending `"/s"`, `"/m"` or `"/h"` to the value. Tasks will be evenly + distributed over the specified time frame. + + Example: `"100/m"` (hundred tasks a minute). This will enforce a minimum + delay of 600ms between starting two tasks on the same worker instance. + + Default is the :setting:`CELERY_DEFAULT_RATE_LIMIT` setting, + which if not specified means rate limiting for tasks is disabled by default. + + Note that this is a *per worker instance* rate limit, and not a global + rate limit. To enforce a global rate limit (e.g. for an API with a + maximum number of requests per second), you must restrict to a given + queue. + +.. attribute:: Task.time_limit + + The hard time limit, in seconds, for this task. If not set then the workers default + will be used. + +.. attribute:: Task.soft_time_limit + + The soft time limit for this task. If not set then the workers default + will be used. + +.. attribute:: Task.ignore_result + + Don't store task state. Note that this means you can't use + :class:`~celery.result.AsyncResult` to check if the task is ready, + or get its return value. + +.. attribute:: Task.store_errors_even_if_ignored + + If :const:`True`, errors will be stored even if the task is configured + to ignore results. + +.. attribute:: Task.send_error_emails + + Send an email whenever a task of this type fails. + Defaults to the :setting:`CELERY_SEND_TASK_ERROR_EMAILS` setting. + See :ref:`conf-error-mails` for more information. + +.. attribute:: Task.ErrorMail + + If the sending of error emails is enabled for this task, then + this is the class defining the logic to send error mails. + +.. attribute:: Task.serializer + + A string identifying the default serialization + method to use. Defaults to the :setting:`CELERY_TASK_SERIALIZER` + setting. Can be `pickle` `json`, `yaml`, or any custom + serialization methods that have been registered with + :mod:`kombu.serialization.registry`. + + Please see :ref:`calling-serializers` for more information. + +.. attribute:: Task.compression + + A string identifying the default compression scheme to use. + + Defaults to the :setting:`CELERY_MESSAGE_COMPRESSION` setting. + Can be `gzip`, or `bzip2`, or any custom compression schemes + that have been registered with the :mod:`kombu.compression` registry. + + Please see :ref:`calling-compression` for more information. + +.. attribute:: Task.backend + + The result store backend to use for this task. Defaults to the + :setting:`CELERY_RESULT_BACKEND` setting. + +.. attribute:: Task.acks_late + + If set to :const:`True` messages for this task will be acknowledged + **after** the task has been executed, not *just before*, which is + the default behavior. + + Note that this means the task may be executed twice if the worker + crashes in the middle of execution, which may be acceptable for some + applications. + + The global default can be overridden by the :setting:`CELERY_ACKS_LATE` + setting. + +.. _task-track-started: + +.. attribute:: Task.track_started + + If :const:`True` the task will report its status as "started" + when the task is executed by a worker. + The default value is :const:`False` as the normal behaviour is to not + report that level of granularity. Tasks are either pending, finished, + or waiting to be retried. Having a "started" status can be useful for + when there are long running tasks and there is a need to report which + task is currently running. + + The host name and process id of the worker executing the task + will be available in the state metadata (e.g. `result.info['pid']`) + + The global default can be overridden by the + :setting:`CELERY_TRACK_STARTED` setting. + + +.. seealso:: + + The API reference for :class:`~@Task`. + +.. _task-states: + +States +====== + +Celery can keep track of the tasks current state. The state also contains the +result of a successful task, or the exception and traceback information of a +failed task. + +There are several *result backends* to choose from, and they all have +different strengths and weaknesses (see :ref:`task-result-backends`). + +During its lifetime a task will transition through several possible states, +and each state may have arbitrary metadata attached to it. When a task +moves into a new state the previous state is +forgotten about, but some transitions can be deducted, (e.g. a task now +in the :state:`FAILED` state, is implied to have been in the +:state:`STARTED` state at some point). + +There are also sets of states, like the set of +:state:`FAILURE_STATES`, and the set of :state:`READY_STATES`. + +The client uses the membership of these sets to decide whether +the exception should be re-raised (:state:`PROPAGATE_STATES`), or whether +the state can be cached (it can if the task is ready). + +You can also define :ref:`custom-states`. + +.. _task-result-backends: + +Result Backends +--------------- + +If you want to keep track of tasks or need the return values, then Celery +must store or send the states somewhere so that they can be retrieved later. +There are several built-in result backends to choose from: SQLAlchemy/Django ORM, +Memcached, RabbitMQ (amqp), MongoDB, and Redis -- or you can define your own. + +No backend works well for every use case. +You should read about the strengths and weaknesses of each backend, and choose +the most appropriate for your needs. + + +.. seealso:: + + :ref:`conf-result-backend` + +RabbitMQ Result Backend +~~~~~~~~~~~~~~~~~~~~~~~ + +The RabbitMQ result backend (amqp) is special as it does not actually *store* +the states, but rather sends them as messages. This is an important difference as it +means that a result *can only be retrieved once*; If you have two processes +waiting for the same result, one of the processes will never receive the +result! + +Even with that limitation, it is an excellent choice if you need to receive +state changes in real-time. Using messaging means the client does not have to +poll for new states. + +There are several other pitfalls you should be aware of when using the +RabbitMQ result backend: + +* Every new task creates a new queue on the server, with thousands of tasks + the broker may be overloaded with queues and this will affect performance in + negative ways. If you're using RabbitMQ then each queue will be a separate + Erlang process, so if you're planning to keep many results simultaneously you + may have to increase the Erlang process limit, and the maximum number of file + descriptors your OS allows. + +* Old results will be cleaned automatically, based on the + :setting:`CELERY_TASK_RESULT_EXPIRES` setting. By default this is set to + expire after 1 day: if you have a very busy cluster you should lower + this value. + +For a list of options supported by the RabbitMQ result backend, please see +:ref:`conf-amqp-result-backend`. + + +Database Result Backend +~~~~~~~~~~~~~~~~~~~~~~~ + +Keeping state in the database can be convenient for many, especially for +web applications with a database already in place, but it also comes with +limitations. + +* Polling the database for new states is expensive, and so you should + increase the polling intervals of operations such as `result.get()`. + +* Some databases use a default transaction isolation level that + is not suitable for polling tables for changes. + + In MySQL the default transaction isolation level is `REPEATABLE-READ`, which + means the transaction will not see changes by other transactions until the + transaction is committed. It is recommended that you change to the + `READ-COMMITTED` isolation level. + + +.. _task-builtin-states: + +Built-in States +--------------- + +.. state:: PENDING + +PENDING +~~~~~~~ + +Task is waiting for execution or unknown. +Any task id that is not known is implied to be in the pending state. + +.. state:: STARTED + +STARTED +~~~~~~~ + +Task has been started. +Not reported by default, to enable please see :attr:`@Task.track_started`. + +:metadata: `pid` and `hostname` of the worker process executing + the task. + +.. state:: SUCCESS + +SUCCESS +~~~~~~~ + +Task has been successfully executed. + +:metadata: `result` contains the return value of the task. +:propagates: Yes +:ready: Yes + +.. state:: FAILURE + +FAILURE +~~~~~~~ + +Task execution resulted in failure. + +:metadata: `result` contains the exception occurred, and `traceback` + contains the backtrace of the stack at the point when the + exception was raised. +:propagates: Yes + +.. state:: RETRY + +RETRY +~~~~~ + +Task is being retried. + +:metadata: `result` contains the exception that caused the retry, + and `traceback` contains the backtrace of the stack at the point + when the exceptions was raised. +:propagates: No + +.. state:: REVOKED + +REVOKED +~~~~~~~ + +Task has been revoked. + +:propagates: Yes + +.. _custom-states: + +Custom states +------------- + +You can easily define your own states, all you need is a unique name. +The name of the state is usually an uppercase string. As an example +you could have a look at :mod:`abortable tasks <~celery.contrib.abortable>` +which defines its own custom :state:`ABORTED` state. + +Use :meth:`~@Task.update_state` to update a task's state:: + + @app.task(bind=True) + def upload_files(self, filenames): + for i, file in enumerate(filenames): + self.update_state(state='PROGRESS', + meta={'current': i, 'total': len(filenames)}) + + +Here I created the state `"PROGRESS"`, which tells any application +aware of this state that the task is currently in progress, and also where +it is in the process by having `current` and `total` counts as part of the +state metadata. This can then be used to create e.g. progress bars. + +.. _pickling_exceptions: + +Creating pickleable exceptions +------------------------------ + +A rarely known Python fact is that exceptions must conform to some +simple rules to support being serialized by the pickle module. + +Tasks that raise exceptions that are not pickleable will not work +properly when Pickle is used as the serializer. + +To make sure that your exceptions are pickleable the exception +*MUST* provide the original arguments it was instantiated +with in its ``.args`` attribute. The simplest way +to ensure this is to have the exception call ``Exception.__init__``. + +Let's look at some examples that work, and one that doesn't: + +.. code-block:: python + + + # OK: + class HttpError(Exception): + pass + + # BAD: + class HttpError(Exception): + + def __init__(self, status_code): + self.status_code = status_code + + # OK: + class HttpError(Exception): + + def __init__(self, status_code): + self.status_code = status_code + Exception.__init__(self, status_code) # <-- REQUIRED + + +So the rule is: +For any exception that supports custom arguments ``*args``, +``Exception.__init__(self, *args)`` must be used. + +There is no special support for *keyword arguments*, so if you +want to preserve keyword arguments when the exception is unpickled +you have to pass them as regular args: + +.. code-block:: python + + class HttpError(Exception): + + def __init__(self, status_code, headers=None, body=None): + self.status_code = status_code + self.headers = headers + self.body = body + + super(HttpError, self).__init__(status_code, headers, body) + +.. _task-semipredicates: + +Semipredicates +============== + +The worker wraps the task in a tracing function which records the final +state of the task. There are a number of exceptions that can be used to +signal this function to change how it treats the return of the task. + +.. _task-semipred-ignore: + +Ignore +------ + +The task may raise :exc:`~@Ignore` to force the worker to ignore the +task. This means that no state will be recorded for the task, but the +message is still acknowledged (removed from queue). + +This can be used if you want to implement custom revoke-like +functionality, or manually store the result of a task. + +Example keeping revoked tasks in a Redis set: + +.. code-block:: python + + from celery.exceptions import Ignore + + @app.task(bind=True) + def some_task(self): + if redis.ismember('tasks.revoked', self.request.id): + raise Ignore() + +Example that stores results manually: + +.. code-block:: python + + from celery import states + from celery.exceptions import Ignore + + @app.task(bind=True) + def get_tweets(self, user): + timeline = twitter.get_timeline(user) + self.update_state(state=states.SUCCESS, meta=timeline) + raise Ignore() + +.. _task-semipred-reject: + +Reject +------ + +The task may raise :exc:`~@Reject` to reject the task message using +AMQPs ``basic_reject`` method. This will not have any effect unless +:attr:`Task.acks_late` is enabled. + +Rejecting a message has the same effect as acking it, but some +brokers may implement additional functionality that can be used. +For example RabbitMQ supports the concept of `Dead Letter Exchanges`_ +where a queue can be configured to use a dead letter exchange that rejected +messages are redelivered to. + +.. _`Dead Letter Exchanges`: http://www.rabbitmq.com/dlx.html + +Reject can also be used to requeue messages, but please be very careful +when using this as it can easily result in an infinite message loop. + +Example using reject when a task causes an out of memory condition: + +.. code-block:: python + + import errno + from celery.exceptions import Reject + + @app.task(bind=True, acks_late=True) + def render_scene(self, path): + file = get_file(path) + try: + renderer.render_scene(file) + + # if the file is too big to fit in memory + # we reject it so that it's redelivered to the dead letter exchange + # and we can manually inspect the situation. + except MemoryError as exc: + raise Reject(exc, requeue=False) + except OSError as exc: + if exc.errno == errno.ENOMEM: + raise Reject(exc, requeue=False) + + # For any other error we retry after 10 seconds. + except Exception as exc: + raise self.retry(exc, countdown=10) + +Example requeuing the message: + +.. code-block:: python + + from celery.exceptions import Reject + + @app.task(bind=True, acks_late=True) + def requeues(self): + if not self.request.delivery_info['redelivered']: + raise Reject('no reason', requeue=True) + print('received two times') + +Consult your broker documentation for more details about the ``basic_reject`` +method. + + +.. _task-semipred-retry: + +Retry +----- + +The :exc:`~@Retry` exception is raised by the ``Task.retry`` method +to tell the worker that the task is being retried. + +.. _task-custom-classes: + +Custom task classes +=================== + +All tasks inherit from the :class:`@Task` class. +The :meth:`~@Task.run` method becomes the task body. + +As an example, the following code, + +.. code-block:: python + + @app.task + def add(x, y): + return x + y + + +will do roughly this behind the scenes: + +.. code-block:: python + + class _AddTask(app.Task): + + def run(self, x, y): + return x + y + add = app.tasks[_AddTask.name] + + +Instantiation +------------- + +A task is **not** instantiated for every request, but is registered +in the task registry as a global instance. + +This means that the ``__init__`` constructor will only be called +once per process, and that the task class is semantically closer to an +Actor. + +If you have a task, + +.. code-block:: python + + from celery import Task + + class NaiveAuthenticateServer(Task): + + def __init__(self): + self.users = {'george': 'password'} + + def run(self, username, password): + try: + return self.users[username] == password + except KeyError: + return False + +And you route every request to the same process, then it +will keep state between requests. + +This can also be useful to cache resources, +e.g. a base Task class that caches a database connection: + +.. code-block:: python + + from celery import Task + + class DatabaseTask(Task): + abstract = True + _db = None + + @property + def db(self): + if self._db is None: + self._db = Database.connect() + return self._db + + +that can be added to tasks like this: + +.. code-block:: python + + + @app.task(base=DatabaseTask) + def process_rows(): + for row in process_rows.db.table.all(): + … + +The ``db`` attribute of the ``process_rows`` task will then +always stay the same in each process. + +Abstract classes +---------------- + +Abstract classes are not registered, but are used as the +base class for new task types. + +.. code-block:: python + + from celery import Task + + class DebugTask(Task): + abstract = True + + def after_return(self, *args, **kwargs): + print('Task returned: {0!r}'.format(self.request) + + + @app.task(base=DebugTask) + def add(x, y): + return x + y + + +Handlers +-------- + +.. method:: after_return(self, status, retval, task_id, args, kwargs, einfo) + + Handler called after the task returns. + + :param status: Current task state. + :param retval: Task return value/exception. + :param task_id: Unique id of the task. + :param args: Original arguments for the task that returned. + :param kwargs: Original keyword arguments for the task + that returned. + + :keyword einfo: :class:`~celery.datastructures.ExceptionInfo` + instance, containing the traceback (if any). + + The return value of this handler is ignored. + +.. method:: on_failure(self, exc, task_id, args, kwargs, einfo) + + This is run by the worker when the task fails. + + :param exc: The exception raised by the task. + :param task_id: Unique id of the failed task. + :param args: Original arguments for the task that failed. + :param kwargs: Original keyword arguments for the task + that failed. + + :keyword einfo: :class:`~celery.datastructures.ExceptionInfo` + instance, containing the traceback. + + The return value of this handler is ignored. + +.. method:: on_retry(self, exc, task_id, args, kwargs, einfo) + + This is run by the worker when the task is to be retried. + + :param exc: The exception sent to :meth:`~@Task.retry`. + :param task_id: Unique id of the retried task. + :param args: Original arguments for the retried task. + :param kwargs: Original keyword arguments for the retried task. + + :keyword einfo: :class:`~celery.datastructures.ExceptionInfo` + instance, containing the traceback. + + The return value of this handler is ignored. + +.. method:: on_success(self, retval, task_id, args, kwargs) + + Run by the worker if the task executes successfully. + + :param retval: The return value of the task. + :param task_id: Unique id of the executed task. + :param args: Original arguments for the executed task. + :param kwargs: Original keyword arguments for the executed task. + + The return value of this handler is ignored. + +on_retry +~~~~~~~~ + +.. _task-how-they-work: + +How it works +============ + +Here comes the technical details, this part isn't something you need to know, +but you may be interested. + +All defined tasks are listed in a registry. The registry contains +a list of task names and their task classes. You can investigate this registry +yourself: + +.. code-block:: python + + >>> from proj.celery import app + >>> app.tasks + {'celery.chord_unlock': + <@task: celery.chord_unlock>, + 'celery.backend_cleanup': + <@task: celery.backend_cleanup>, + 'celery.chord': + <@task: celery.chord>} + +This is the list of tasks built-in to celery. Note that tasks +will only be registered when the module they are defined in is imported. + +The default loader imports any modules listed in the +:setting:`CELERY_IMPORTS` setting. + +The entity responsible for registering your task in the registry is the +metaclass: :class:`~celery.task.base.TaskType`. + +If you want to register your task manually you can mark the +task as :attr:`~@Task.abstract`: + +.. code-block:: python + + class MyTask(Task): + abstract = True + +This way the task won't be registered, but any task inheriting from +it will be. + +When tasks are sent, no actual function code is sent with it, just the name +of the task to execute. When the worker then receives the message it can look +up the name in its task registry to find the execution code. + +This means that your workers should always be updated with the same software +as the client. This is a drawback, but the alternative is a technical +challenge that has yet to be solved. + +.. _task-best-practices: + +Tips and Best Practices +======================= + +.. _task-ignore_results: + +Ignore results you don't want +----------------------------- + +If you don't care about the results of a task, be sure to set the +:attr:`~@Task.ignore_result` option, as storing results +wastes time and resources. + +.. code-block:: python + + @app.task(ignore_result=True) + def mytask(…): + something() + +Results can even be disabled globally using the :setting:`CELERY_IGNORE_RESULT` +setting. + +.. _task-disable-rate-limits: + +Disable rate limits if they're not used +--------------------------------------- + +Disabling rate limits altogether is recommended if you don't have +any tasks using them. This is because the rate limit subsystem introduces +quite a lot of complexity. + +Set the :setting:`CELERY_DISABLE_RATE_LIMITS` setting to globally disable +rate limits: + +.. code-block:: python + + CELERY_DISABLE_RATE_LIMITS = True + +You find additional optimization tips in the +:ref:`Optimizing Guide `. + +.. _task-synchronous-subtasks: + +Avoid launching synchronous subtasks +------------------------------------ + +Having a task wait for the result of another task is really inefficient, +and may even cause a deadlock if the worker pool is exhausted. + +Make your design asynchronous instead, for example by using *callbacks*. + +**Bad**: + +.. code-block:: python + + @app.task + def update_page_info(url): + page = fetch_page.delay(url).get() + info = parse_page.delay(url, page).get() + store_page_info.delay(url, info) + + @app.task + def fetch_page(url): + return myhttplib.get(url) + + @app.task + def parse_page(url, page): + return myparser.parse_document(page) + + @app.task + def store_page_info(url, info): + return PageInfo.objects.create(url, info) + + +**Good**: + +.. code-block:: python + + def update_page_info(url): + # fetch_page -> parse_page -> store_page + chain = fetch_page.s() | parse_page.s() | store_page_info.s(url) + chain() + + @app.task() + def fetch_page(url): + return myhttplib.get(url) + + @app.task() + def parse_page(page): + return myparser.parse_document(page) + + @app.task(ignore_result=True) + def store_page_info(info, url): + PageInfo.objects.create(url=url, info=info) + + +Here I instead created a chain of tasks by linking together +different :func:`~celery.subtask`'s. +You can read about chains and other powerful constructs +at :ref:`designing-workflows`. + +.. _task-performance-and-strategies: + +Performance and Strategies +========================== + +.. _task-granularity: + +Granularity +----------- + +The task granularity is the amount of computation needed by each subtask. +In general it is better to split the problem up into many small tasks, than +have a few long running tasks. + +With smaller tasks you can process more tasks in parallel and the tasks +won't run long enough to block the worker from processing other waiting tasks. + +However, executing a task does have overhead. A message needs to be sent, data +may not be local, etc. So if the tasks are too fine-grained the additional +overhead may not be worth it in the end. + +.. seealso:: + + The book `Art of Concurrency`_ has a section dedicated to the topic + of task granularity [AOC1]_. + +.. _`Art of Concurrency`: http://oreilly.com/catalog/9780596521547 + +.. [AOC1] Breshears, Clay. Section 2.2.1, "The Art of Concurrency". + O'Reilly Media, Inc. May 15, 2009. ISBN-13 978-0-596-52153-0. + +.. _task-data-locality: + +Data locality +------------- + +The worker processing the task should be as close to the data as +possible. The best would be to have a copy in memory, the worst would be a +full transfer from another continent. + +If the data is far away, you could try to run another worker at location, or +if that's not possible - cache often used data, or preload data you know +is going to be used. + +The easiest way to share data between workers is to use a distributed cache +system, like `memcached`_. + +.. seealso:: + + The paper `Distributed Computing Economics`_ by Jim Gray is an excellent + introduction to the topic of data locality. + +.. _`Distributed Computing Economics`: + http://research.microsoft.com/pubs/70001/tr-2003-24.pdf + +.. _`memcached`: http://memcached.org/ + +.. _task-state: + +State +----- + +Since celery is a distributed system, you can't know in which process, or +on what machine the task will be executed. You can't even know if the task will +run in a timely manner. + +The ancient async sayings tells us that “asserting the world is the +responsibility of the task”. What this means is that the world view may +have changed since the task was requested, so the task is responsible for +making sure the world is how it should be; If you have a task +that re-indexes a search engine, and the search engine should only be +re-indexed at maximum every 5 minutes, then it must be the tasks +responsibility to assert that, not the callers. + +Another gotcha is Django model objects. They shouldn't be passed on as +arguments to tasks. It's almost always better to re-fetch the object from +the database when the task is running instead, as using old data may lead +to race conditions. + +Imagine the following scenario where you have an article and a task +that automatically expands some abbreviations in it: + +.. code-block:: python + + class Article(models.Model): + title = models.CharField() + body = models.TextField() + + @app.task + def expand_abbreviations(article): + article.body.replace('MyCorp', 'My Corporation') + article.save() + +First, an author creates an article and saves it, then the author +clicks on a button that initiates the abbreviation task:: + + >>> article = Article.objects.get(id=102) + >>> expand_abbreviations.delay(article) + +Now, the queue is very busy, so the task won't be run for another 2 minutes. +In the meantime another author makes changes to the article, so +when the task is finally run, the body of the article is reverted to the old +version because the task had the old body in its argument. + +Fixing the race condition is easy, just use the article id instead, and +re-fetch the article in the task body: + +.. code-block:: python + + @app.task + def expand_abbreviations(article_id): + article = Article.objects.get(id=article_id) + article.body.replace('MyCorp', 'My Corporation') + article.save() + + >>> expand_abbreviations(article_id) + +There might even be performance benefits to this approach, as sending large +messages may be expensive. + +.. _task-database-transactions: + +Database transactions +--------------------- + +Let's have a look at another example: + +.. code-block:: python + + from django.db import transaction + + @transaction.commit_on_success + def create_article(request): + article = Article.objects.create(…) + expand_abbreviations.delay(article.pk) + +This is a Django view creating an article object in the database, +then passing the primary key to a task. It uses the `commit_on_success` +decorator, which will commit the transaction when the view returns, or +roll back if the view raises an exception. + +There is a race condition if the task starts executing +before the transaction has been committed; The database object does not exist +yet! + +The solution is to *always commit transactions before sending tasks +depending on state from the current transaction*: + +.. code-block:: python + + @transaction.commit_manually + def create_article(request): + try: + article = Article.objects.create(…) + except: + transaction.rollback() + raise + else: + transaction.commit() + expand_abbreviations.delay(article.pk) + +.. _task-example: + +Example +======= + +Let's take a real wold example; A blog where comments posted needs to be +filtered for spam. When the comment is created, the spam filter runs in the +background, so the user doesn't have to wait for it to finish. + +I have a Django blog application allowing comments +on blog posts. I'll describe parts of the models/views and tasks for this +application. + +blog/models.py +-------------- + +The comment model looks like this: + +.. code-block:: python + + from django.db import models + from django.utils.translation import ugettext_lazy as _ + + + class Comment(models.Model): + name = models.CharField(_('name'), max_length=64) + email_address = models.EmailField(_('email address')) + homepage = models.URLField(_('home page'), + blank=True, verify_exists=False) + comment = models.TextField(_('comment')) + pub_date = models.DateTimeField(_('Published date'), + editable=False, auto_add_now=True) + is_spam = models.BooleanField(_('spam?'), + default=False, editable=False) + + class Meta: + verbose_name = _('comment') + verbose_name_plural = _('comments') + + +In the view where the comment is posted, I first write the comment +to the database, then I launch the spam filter task in the background. + +.. _task-example-blog-views: + +blog/views.py +------------- + +.. code-block:: python + + from django import forms + from django.http import HttpResponseRedirect + from django.template.context import RequestContext + from django.shortcuts import get_object_or_404, render_to_response + + from blog import tasks + from blog.models import Comment + + + class CommentForm(forms.ModelForm): + + class Meta: + model = Comment + + + def add_comment(request, slug, template_name='comments/create.html'): + post = get_object_or_404(Entry, slug=slug) + remote_addr = request.META.get('REMOTE_ADDR') + + if request.method == 'post': + form = CommentForm(request.POST, request.FILES) + if form.is_valid(): + comment = form.save() + # Check spam asynchronously. + tasks.spam_filter.delay(comment_id=comment.id, + remote_addr=remote_addr) + return HttpResponseRedirect(post.get_absolute_url()) + else: + form = CommentForm() + + context = RequestContext(request, {'form': form}) + return render_to_response(template_name, context_instance=context) + + +To filter spam in comments I use `Akismet`_, the service +used to filter spam in comments posted to the free weblog platform +`Wordpress`. `Akismet`_ is free for personal use, but for commercial use you +need to pay. You have to sign up to their service to get an API key. + +To make API calls to `Akismet`_ I use the `akismet.py`_ library written by +`Michael Foord`_. + +.. _task-example-blog-tasks: + +blog/tasks.py +------------- + +.. code-block:: python + + from celery import Celery + + from akismet import Akismet + + from django.core.exceptions import ImproperlyConfigured + from django.contrib.sites.models import Site + + from blog.models import Comment + + + app = Celery(broker='amqp://') + + + @app.task + def spam_filter(comment_id, remote_addr=None): + logger = spam_filter.get_logger() + logger.info('Running spam filter for comment %s', comment_id) + + comment = Comment.objects.get(pk=comment_id) + current_domain = Site.objects.get_current().domain + akismet = Akismet(settings.AKISMET_KEY, 'http://{0}'.format(domain)) + if not akismet.verify_key(): + raise ImproperlyConfigured('Invalid AKISMET_KEY') + + + is_spam = akismet.comment_check(user_ip=remote_addr, + comment_content=comment.comment, + comment_author=comment.name, + comment_author_email=comment.email_address) + if is_spam: + comment.is_spam = True + comment.save() + + return is_spam + +.. _`Akismet`: http://akismet.com/faq/ +.. _`akismet.py`: http://www.voidspace.org.uk/downloads/akismet.py +.. _`Michael Foord`: http://www.voidspace.org.uk/ diff --git a/docs/userguide/workers.rst b/docs/userguide/workers.rst new file mode 100644 index 0000000..cbfe81f --- /dev/null +++ b/docs/userguide/workers.rst @@ -0,0 +1,1173 @@ +.. _guide-workers: + +=============== + Workers Guide +=============== + +.. contents:: + :local: + :depth: 1 + +.. _worker-starting: + +Starting the worker +=================== + +.. sidebar:: Daemonizing + + You probably want to use a daemonization tool to start + in the background. See :ref:`daemonizing` for help + detaching the worker using popular daemonization tools. + +You can start the worker in the foreground by executing the command: + +.. code-block:: bash + + $ celery -A proj worker -l info + +For a full list of available command-line options see +:mod:`~celery.bin.worker`, or simply do: + +.. code-block:: bash + + $ celery worker --help + +You can also start multiple workers on the same machine. If you do so +be sure to give a unique name to each individual worker by specifying a +host name with the :option:`--hostname|-n` argument: + +.. code-block:: bash + + $ celery -A proj worker --loglevel=INFO --concurrency=10 -n worker1.%h + $ celery -A proj worker --loglevel=INFO --concurrency=10 -n worker2.%h + $ celery -A proj worker --loglevel=INFO --concurrency=10 -n worker3.%h + +The hostname argument can expand the following variables: + + - ``%h``: Hostname including domain name. + - ``%n``: Hostname only. + - ``%d``: Domain name only. + +E.g. if the current hostname is ``george.example.com`` then +these will expand to: + + - ``worker1.%h`` -> ``worker1.george.example.com`` + - ``worker1.%n`` -> ``worker1.george`` + - ``worker1.%d`` -> ``worker1.example.com`` + +.. _worker-stopping: + +Stopping the worker +=================== + +Shutdown should be accomplished using the :sig:`TERM` signal. + +When shutdown is initiated the worker will finish all currently executing +tasks before it actually terminates, so if these tasks are important you should +wait for it to finish before doing anything drastic (like sending the :sig:`KILL` +signal). + +If the worker won't shutdown after considerate time, for example because +of tasks stuck in an infinite-loop, you can use the :sig:`KILL` signal to +force terminate the worker, but be aware that currently executing tasks will +be lost (unless the tasks have the :attr:`~@Task.acks_late` +option set). + +Also as processes can't override the :sig:`KILL` signal, the worker will +not be able to reap its children, so make sure to do so manually. This +command usually does the trick: + +.. code-block:: bash + + $ ps auxww | grep 'celery worker' | awk '{print $2}' | xargs kill -9 + +.. _worker-restarting: + +Restarting the worker +===================== + +To restart the worker you should send the `TERM` signal and start a new +instance. The easiest way to manage workers for development +is by using `celery multi`: + + .. code-block:: bash + + $ celery multi start 1 -A proj -l info -c4 --pidfile=/var/run/celery/%n.pid + $ celery multi restart 1 --pidfile=/var/run/celery/%n.pid + +For production deployments you should be using init scripts or other process +supervision systems (see :ref:`daemonizing`). + +Other than stopping then starting the worker to restart, you can also +restart the worker using the :sig:`HUP` signal, but note that the worker +will be responsible for restarting itself so this is prone to problems and +is not recommended in production: + +.. code-block:: bash + + $ kill -HUP $pid + +.. note:: + + Restarting by :sig:`HUP` only works if the worker is running + in the background as a daemon (it does not have a controlling + terminal). + + :sig:`HUP` is disabled on OS X because of a limitation on + that platform. + + +.. _worker-process-signals: + +Process Signals +=============== + +The worker's main process overrides the following signals: + ++--------------+-------------------------------------------------+ +| :sig:`TERM` | Warm shutdown, wait for tasks to complete. | ++--------------+-------------------------------------------------+ +| :sig:`QUIT` | Cold shutdown, terminate ASAP | ++--------------+-------------------------------------------------+ +| :sig:`USR1` | Dump traceback for all active threads. | ++--------------+-------------------------------------------------+ +| :sig:`USR2` | Remote debug, see :mod:`celery.contrib.rdb`. | ++--------------+-------------------------------------------------+ + +.. _worker-files: + +Variables in file paths +======================= + +The file path arguments for :option:`--logfile`, :option:`--pidfile` and :option:`--statedb` +can contain variables that the worker will expand: + +Node name replacements +---------------------- + +- ``%h``: Hostname including domain name. +- ``%n``: Hostname only. +- ``%d``: Domain name only. +- ``%i``: Prefork pool process index or 0 if MainProcess. +- ``%I``: Prefork pool process index with separator. + +E.g. if the current hostname is ``george.example.com`` then +these will expand to: + +- ``--logfile=%h.log`` -> :file:`george.example.com.log` +- ``--logfile=%n.log`` -> :file:`george.log` +- ``--logfile=%d`` -> :file:`example.com.log` + +.. _worker-files-process-index: + +Prefork pool process index +-------------------------- + +The prefork pool process index specifiers will expand into a different +filename depending on the process that will eventually need to open the file. + +This can be used to specify one log file per child process. + +Note that the numbers will stay within the process limit even if processes +exit or if autoscale/maxtasksperchild/time limits are used. I.e. the number +is the *process index* not the process count or pid. + +* ``%i`` - Pool process index or 0 if MainProcess. + + Where ``-n worker1@example.com -c2 -f %n-%i.log`` will result in + three log files: + + - :file:`worker1-0.log` (main process) + - :file:`worker1-1.log` (pool process 1) + - :file:`worker1-2.log` (pool process 2) + +* ``%I`` - Pool process index with separator. + + Where ``-n worker1@example.com -c2 -f %n%I.log`` will result in + three log files: + + - :file:`worker1.log` (main process) + - :file:`worker1-1.log`` (pool process 1) + - :file:`worker1-2.log`` (pool process 2) + +.. _worker-concurrency: + +Concurrency +=========== + +By default multiprocessing is used to perform concurrent execution of tasks, +but you can also use :ref:`Eventlet `. The number +of worker processes/threads can be changed using the :option:`--concurrency` +argument and defaults to the number of CPUs available on the machine. + +.. admonition:: Number of processes (multiprocessing/prefork pool) + + More pool processes are usually better, but there's a cut-off point where + adding more pool processes affects performance in negative ways. + There is even some evidence to support that having multiple worker + instances running, may perform better than having a single worker. + For example 3 workers with 10 pool processes each. You need to experiment + to find the numbers that works best for you, as this varies based on + application, work load, task run times and other factors. + +.. _worker-remote-control: + +Remote control +============== + +.. versionadded:: 2.0 + +.. sidebar:: The ``celery`` command + + The :program:`celery` program is used to execute remote control + commands from the command-line. It supports all of the commands + listed below. See :ref:`monitoring-control` for more information. + +pool support: *prefork, eventlet, gevent*, blocking:*threads/solo* (see note) +broker support: *amqp, redis* + +Workers have the ability to be remote controlled using a high-priority +broadcast message queue. The commands can be directed to all, or a specific +list of workers. + +Commands can also have replies. The client can then wait for and collect +those replies. Since there's no central authority to know how many +workers are available in the cluster, there is also no way to estimate +how many workers may send a reply, so the client has a configurable +timeout — the deadline in seconds for replies to arrive in. This timeout +defaults to one second. If the worker doesn't reply within the deadline +it doesn't necessarily mean the worker didn't reply, or worse is dead, but +may simply be caused by network latency or the worker being slow at processing +commands, so adjust the timeout accordingly. + +In addition to timeouts, the client can specify the maximum number +of replies to wait for. If a destination is specified, this limit is set +to the number of destination hosts. + +.. note:: + + The solo and threads pool supports remote control commands, + but any task executing will block any waiting control command, + so it is of limited use if the worker is very busy. In that + case you must increase the timeout waiting for replies in the client. + +.. _worker-broadcast-fun: + +The :meth:`~@control.broadcast` function. +---------------------------------------------------- + +This is the client function used to send commands to the workers. +Some remote control commands also have higher-level interfaces using +:meth:`~@control.broadcast` in the background, like +:meth:`~@control.rate_limit` and :meth:`~@control.ping`. + +Sending the :control:`rate_limit` command and keyword arguments:: + + >>> app.control.broadcast('rate_limit', + ... arguments={'task_name': 'myapp.mytask', + ... 'rate_limit': '200/m'}) + +This will send the command asynchronously, without waiting for a reply. +To request a reply you have to use the `reply` argument:: + + >>> app.control.broadcast('rate_limit', { + ... 'task_name': 'myapp.mytask', 'rate_limit': '200/m'}, reply=True) + [{'worker1.example.com': 'New rate limit set successfully'}, + {'worker2.example.com': 'New rate limit set successfully'}, + {'worker3.example.com': 'New rate limit set successfully'}] + +Using the `destination` argument you can specify a list of workers +to receive the command:: + + >>> app.control.broadcast('rate_limit', { + ... 'task_name': 'myapp.mytask', + ... 'rate_limit': '200/m'}, reply=True, + ... destination=['worker1@example.com']) + [{'worker1.example.com': 'New rate limit set successfully'}] + + +Of course, using the higher-level interface to set rate limits is much +more convenient, but there are commands that can only be requested +using :meth:`~@control.broadcast`. + +Commands +======== + +.. control:: revoke + +``revoke``: Revoking tasks +-------------------------- +:pool support: all +:broker support: *amqp, redis* +:command: :program:`celery -A proj control revoke ` + +All worker nodes keeps a memory of revoked task ids, either in-memory or +persistent on disk (see :ref:`worker-persistent-revokes`). + +When a worker receives a revoke request it will skip executing +the task, but it won't terminate an already executing task unless +the `terminate` option is set. + +.. note:: + + The terminate option is a last resort for administrators when + a task is stuck. It's not for terminating the task, + it's for terminating the process that is executing the task, and that + process may have already started processing another task at the point + when the signal is sent, so for this rason you must never call this + programatically. + +If `terminate` is set the worker child process processing the task +will be terminated. The default signal sent is `TERM`, but you can +specify this using the `signal` argument. Signal can be the uppercase name +of any signal defined in the :mod:`signal` module in the Python Standard +Library. + +Terminating a task also revokes it. + +**Example** + +:: + + >>> result.revoke() + + >>> AsyncResult(id).revoke() + + >>> app.control.revoke('d9078da5-9915-40a0-bfa1-392c7bde42ed') + + >>> app.control.revoke('d9078da5-9915-40a0-bfa1-392c7bde42ed', + ... terminate=True) + + >>> app.control.revoke('d9078da5-9915-40a0-bfa1-392c7bde42ed', + ... terminate=True, signal='SIGKILL') + + + + +Revoking multiple tasks +----------------------- + +.. versionadded:: 3.1 + + +The revoke method also accepts a list argument, where it will revoke +several tasks at once. + +**Example** + +:: + + >>> app.control.revoke([ + ... '7993b0aa-1f0b-4780-9af0-c47c0858b3f2', + ... 'f565793e-b041-4b2b-9ca4-dca22762a55d', + ... 'd9d35e03-2997-42d0-a13e-64a66b88a618', + ]) + + +The ``GroupResult.revoke`` method takes advantage of this since +version 3.1. + +.. _worker-persistent-revokes: + +Persistent revokes +------------------ + +Revoking tasks works by sending a broadcast message to all the workers, +the workers then keep a list of revoked tasks in memory. When a worker starts +up it will synchronize revoked tasks with other workers in the cluster. + +The list of revoked tasks is in-memory so if all workers restart the list +of revoked ids will also vanish. If you want to preserve this list between +restarts you need to specify a file for these to be stored in by using the `--statedb` +argument to :program:`celery worker`: + +.. code-block:: bash + + celery -A proj worker -l info --statedb=/var/run/celery/worker.state + +or if you use :program:`celery multi` you will want to create one file per +worker instance so then you can use the `%n` format to expand the current node +name: + +.. code-block:: bash + + celery multi start 2 -l info --statedb=/var/run/celery/%n.state + + +See also :ref:`worker-files` + +Note that remote control commands must be working for revokes to work. +Remote control commands are only supported by the RabbitMQ (amqp) and Redis +at this point. + +.. _worker-time-limits: + +Time Limits +=========== + +.. versionadded:: 2.0 + +pool support: *prefork/gevent* + +.. sidebar:: Soft, or hard? + + The time limit is set in two values, `soft` and `hard`. + The soft time limit allows the task to catch an exception + to clean up before it is killed: the hard timeout is not catchable + and force terminates the task. + +A single task can potentially run forever, if you have lots of tasks +waiting for some event that will never happen you will block the worker +from processing new tasks indefinitely. The best way to defend against +this scenario happening is enabling time limits. + +The time limit (`--time-limit`) is the maximum number of seconds a task +may run before the process executing it is terminated and replaced by a +new process. You can also enable a soft time limit (`--soft-time-limit`), +this raises an exception the task can catch to clean up before the hard +time limit kills it: + +.. code-block:: python + + from myapp import app + from celery.exceptions import SoftTimeLimitExceeded + + @app.task + def mytask(): + try: + do_work() + except SoftTimeLimitExceeded: + clean_up_in_a_hurry() + +Time limits can also be set using the :setting:`CELERYD_TASK_TIME_LIMIT` / +:setting:`CELERYD_TASK_SOFT_TIME_LIMIT` settings. + +.. note:: + + Time limits do not currently work on Windows and other + platforms that do not support the ``SIGUSR1`` signal. + + +Changing time limits at runtime +------------------------------- +.. versionadded:: 2.3 + +broker support: *amqp, redis* + +There is a remote control command that enables you to change both soft +and hard time limits for a task — named ``time_limit``. + +Example changing the time limit for the ``tasks.crawl_the_web`` task +to have a soft time limit of one minute, and a hard time limit of +two minutes:: + + >>> app.control.time_limit('tasks.crawl_the_web', + soft=60, hard=120, reply=True) + [{'worker1.example.com': {'ok': 'time limits set successfully'}}] + +Only tasks that starts executing after the time limit change will be affected. + +.. _worker-rate-limits: + +Rate Limits +=========== + +.. control:: rate_limit + +Changing rate-limits at runtime +------------------------------- + +Example changing the rate limit for the `myapp.mytask` task to execute +at most 200 tasks of that type every minute: + +.. code-block:: python + + >>> app.control.rate_limit('myapp.mytask', '200/m') + +The above does not specify a destination, so the change request will affect +all worker instances in the cluster. If you only want to affect a specific +list of workers you can include the ``destination`` argument: + +.. code-block:: python + + >>> app.control.rate_limit('myapp.mytask', '200/m', + ... destination=['celery@worker1.example.com']) + +.. warning:: + + This won't affect workers with the + :setting:`CELERY_DISABLE_RATE_LIMITS` setting enabled. + +.. _worker-maxtasksperchild: + +Max tasks per child setting +=========================== + +.. versionadded:: 2.0 + +pool support: *prefork* + +With this option you can configure the maximum number of tasks +a worker can execute before it's replaced by a new process. + +This is useful if you have memory leaks you have no control over +for example from closed source C extensions. + +The option can be set using the workers `--maxtasksperchild` argument +or using the :setting:`CELERYD_MAX_TASKS_PER_CHILD` setting. + +.. _worker-autoscaling: + +Autoscaling +=========== + +.. versionadded:: 2.2 + +pool support: *prefork*, *gevent* + +The *autoscaler* component is used to dynamically resize the pool +based on load: + +- The autoscaler adds more pool processes when there is work to do, + - and starts removing processes when the workload is low. + +It's enabled by the :option:`--autoscale` option, which needs two +numbers: the maximum and minimum number of pool processes:: + + --autoscale=AUTOSCALE + Enable autoscaling by providing + max_concurrency,min_concurrency. Example: + --autoscale=10,3 (always keep 3 processes, but grow to + 10 if necessary). + +You can also define your own rules for the autoscaler by subclassing +:class:`~celery.worker.autoscaler.Autoscaler`. +Some ideas for metrics include load average or the amount of memory available. +You can specify a custom autoscaler with the :setting:`CELERYD_AUTOSCALER` setting. + +.. _worker-queues: + +Queues +====== + +A worker instance can consume from any number of queues. +By default it will consume from all queues defined in the +:setting:`CELERY_QUEUES` setting (which if not specified defaults to the +queue named ``celery``). + +You can specify what queues to consume from at startup, +by giving a comma separated list of queues to the :option:`-Q` option: + +.. code-block:: bash + + $ celery -A proj worker -l info -Q foo,bar,baz + +If the queue name is defined in :setting:`CELERY_QUEUES` it will use that +configuration, but if it's not defined in the list of queues Celery will +automatically generate a new queue for you (depending on the +:setting:`CELERY_CREATE_MISSING_QUEUES` option). + +You can also tell the worker to start and stop consuming from a queue at +runtime using the remote control commands :control:`add_consumer` and +:control:`cancel_consumer`. + +.. control:: add_consumer + +Queues: Adding consumers +------------------------ + +The :control:`add_consumer` control command will tell one or more workers +to start consuming from a queue. This operation is idempotent. + +To tell all workers in the cluster to start consuming from a queue +named "``foo``" you can use the :program:`celery control` program: + +.. code-block:: bash + + $ celery -A proj control add_consumer foo + -> worker1.local: OK + started consuming from u'foo' + +If you want to specify a specific worker you can use the +:option:`--destination`` argument: + +.. code-block:: bash + + $ celery -A proj control add_consumer foo -d worker1.local + +The same can be accomplished dynamically using the :meth:`@control.add_consumer` method:: + + >>> app.control.add_consumer('foo', reply=True) + [{u'worker1.local': {u'ok': u"already consuming from u'foo'"}}] + + >>> app.control.add_consumer('foo', reply=True, + ... destination=['worker1@example.com']) + [{u'worker1.local': {u'ok': u"already consuming from u'foo'"}}] + + +By now I have only shown examples using automatic queues, +If you need more control you can also specify the exchange, routing_key and +even other options:: + + >>> app.control.add_consumer( + ... queue='baz', + ... exchange='ex', + ... exchange_type='topic', + ... routing_key='media.*', + ... options={ + ... 'queue_durable': False, + ... 'exchange_durable': False, + ... }, + ... reply=True, + ... destination=['w1@example.com', 'w2@example.com']) + + +.. control:: cancel_consumer + +Queues: Cancelling consumers +---------------------------- + +You can cancel a consumer by queue name using the :control:`cancel_consumer` +control command. + +To force all workers in the cluster to cancel consuming from a queue +you can use the :program:`celery control` program: + +.. code-block:: bash + + $ celery -A proj control cancel_consumer foo + +The :option:`--destination` argument can be used to specify a worker, or a +list of workers, to act on the command: + +.. code-block:: bash + + $ celery -A proj control cancel_consumer foo -d worker1.local + + +You can also cancel consumers programmatically using the +:meth:`@control.cancel_consumer` method: + +.. code-block:: bash + + >>> app.control.cancel_consumer('foo', reply=True) + [{u'worker1.local': {u'ok': u"no longer consuming from u'foo'"}}] + +.. control:: active_queues + +Queues: List of active queues +----------------------------- + +You can get a list of queues that a worker consumes from by using +the :control:`active_queues` control command: + +.. code-block:: bash + + $ celery -A proj inspect active_queues + [...] + +Like all other remote control commands this also supports the +:option:`--destination` argument used to specify which workers should +reply to the request: + +.. code-block:: bash + + $ celery -A proj inspect active_queues -d worker1.local + [...] + + +This can also be done programmatically by using the +:meth:`@control.inspect.active_queues` method:: + + >>> app.control.inspect().active_queues() + [...] + + >>> app.control.inspect(['worker1.local']).active_queues() + [...] + +.. _worker-autoreloading: + +Autoreloading +============= + +.. versionadded:: 2.5 + +pool support: *prefork, eventlet, gevent, threads, solo* + +Starting :program:`celery worker` with the :option:`--autoreload` option will +enable the worker to watch for file system changes to all imported task +modules imported (and also any non-task modules added to the +:setting:`CELERY_IMPORTS` setting or the :option:`-I|--include` option). + +This is an experimental feature intended for use in development only, +using auto-reload in production is discouraged as the behavior of reloading +a module in Python is undefined, and may cause hard to diagnose bugs and +crashes. Celery uses the same approach as the auto-reloader found in e.g. +the Django ``runserver`` command. + +When auto-reload is enabled the worker starts an additional thread +that watches for changes in the file system. New modules are imported, +and already imported modules are reloaded whenever a change is detected, +and if the prefork pool is used the child processes will finish the work +they are doing and exit, so that they can be replaced by fresh processes +effectively reloading the code. + +File system notification backends are pluggable, and it comes with three +implementations: + +* inotify (Linux) + + Used if the :mod:`pyinotify` library is installed. + If you are running on Linux this is the recommended implementation, + to install the :mod:`pyinotify` library you have to run the following + command: + + .. code-block:: bash + + $ pip install pyinotify + +* kqueue (OS X/BSD) + +* stat + + The fallback implementation simply polls the files using ``stat`` and is very + expensive. + +You can force an implementation by setting the :envvar:`CELERYD_FSNOTIFY` +environment variable: + +.. code-block:: bash + + $ env CELERYD_FSNOTIFY=stat celery worker -l info --autoreload + +.. _worker-autoreload: + +.. control:: pool_restart + +Pool Restart Command +-------------------- + +.. versionadded:: 2.5 + +Requires the :setting:`CELERYD_POOL_RESTARTS` setting to be enabled. + +The remote control command :control:`pool_restart` sends restart requests to +the workers child processes. It is particularly useful for forcing +the worker to import new modules, or for reloading already imported +modules. This command does not interrupt executing tasks. + +Example +~~~~~~~ + +Running the following command will result in the `foo` and `bar` modules +being imported by the worker processes: + +.. code-block:: python + + >>> app.control.broadcast('pool_restart', + ... arguments={'modules': ['foo', 'bar']}) + +Use the ``reload`` argument to reload modules it has already imported: + +.. code-block:: python + + >>> app.control.broadcast('pool_restart', + ... arguments={'modules': ['foo'], + ... 'reload': True}) + +If you don't specify any modules then all known tasks modules will +be imported/reloaded: + +.. code-block:: python + + >>> app.control.broadcast('pool_restart', arguments={'reload': True}) + +The ``modules`` argument is a list of modules to modify. ``reload`` +specifies whether to reload modules if they have previously been imported. +By default ``reload`` is disabled. The `pool_restart` command uses the +Python :func:`reload` function to reload modules, or you can provide +your own custom reloader by passing the ``reloader`` argument. + +.. note:: + + Module reloading comes with caveats that are documented in :func:`reload`. + Please read this documentation and make sure your modules are suitable + for reloading. + +.. seealso:: + + - http://pyunit.sourceforge.net/notes/reloading.html + - http://www.indelible.org/ink/python-reloading/ + - http://docs.python.org/library/functions.html#reload + + +.. _worker-inspect: + +Inspecting workers +================== + +:class:`@control.inspect` lets you inspect running workers. It +uses remote control commands under the hood. + +You can also use the ``celery`` command to inspect workers, +and it supports the same commands as the :class:`@Celery.control` interface. + +.. code-block:: python + + # Inspect all nodes. + >>> i = app.control.inspect() + + # Specify multiple nodes to inspect. + >>> i = app.control.inspect(['worker1.example.com', + 'worker2.example.com']) + + # Specify a single node to inspect. + >>> i = app.control.inspect('worker1.example.com') + +.. _worker-inspect-registered-tasks: + +Dump of registered tasks +------------------------ + +You can get a list of tasks registered in the worker using the +:meth:`~@control.inspect.registered`:: + + >>> i.registered() + [{'worker1.example.com': ['tasks.add', + 'tasks.sleeptask']}] + +.. _worker-inspect-active-tasks: + +Dump of currently executing tasks +--------------------------------- + +You can get a list of active tasks using +:meth:`~@control.inspect.active`:: + + >>> i.active() + [{'worker1.example.com': + [{'name': 'tasks.sleeptask', + 'id': '32666e9b-809c-41fa-8e93-5ae0c80afbbf', + 'args': '(8,)', + 'kwargs': '{}'}]}] + +.. _worker-inspect-eta-schedule: + +Dump of scheduled (ETA) tasks +----------------------------- + +You can get a list of tasks waiting to be scheduled by using +:meth:`~@control.inspect.scheduled`:: + + >>> i.scheduled() + [{'worker1.example.com': + [{'eta': '2010-06-07 09:07:52', 'priority': 0, + 'request': { + 'name': 'tasks.sleeptask', + 'id': '1a7980ea-8b19-413e-91d2-0b74f3844c4d', + 'args': '[1]', + 'kwargs': '{}'}}, + {'eta': '2010-06-07 09:07:53', 'priority': 0, + 'request': { + 'name': 'tasks.sleeptask', + 'id': '49661b9a-aa22-4120-94b7-9ee8031d219d', + 'args': '[2]', + 'kwargs': '{}'}}]}] + +.. note:: + + These are tasks with an eta/countdown argument, not periodic tasks. + +.. _worker-inspect-reserved: + +Dump of reserved tasks +---------------------- + +Reserved tasks are tasks that has been received, but is still waiting to be +executed. + +You can get a list of these using +:meth:`~@control.inspect.reserved`:: + + >>> i.reserved() + [{'worker1.example.com': + [{'name': 'tasks.sleeptask', + 'id': '32666e9b-809c-41fa-8e93-5ae0c80afbbf', + 'args': '(8,)', + 'kwargs': '{}'}]}] + + +.. _worker-statistics: + +Statistics +---------- + +The remote control command ``inspect stats`` (or +:meth:`~@control.inspect.stats`) will give you a long list of useful (or not +so useful) statistics about the worker: + +.. code-block:: bash + + $ celery -A proj inspect stats + +The output will include the following fields: + +- ``broker`` + + Section for broker information. + + * ``connect_timeout`` + + Timeout in seconds (int/float) for establishing a new connection. + + * ``heartbeat`` + + Current heartbeat value (set by client). + + * ``hostname`` + + Hostname of the remote broker. + + * ``insist`` + + No longer used. + + * ``login_method`` + + Login method used to connect to the broker. + + * ``port`` + + Port of the remote broker. + + * ``ssl`` + + SSL enabled/disabled. + + * ``transport`` + + Name of transport used (e.g. ``amqp`` or ``redis``) + + * ``transport_options`` + + Options passed to transport. + + * ``uri_prefix`` + + Some transports expects the host name to be an URL, this applies to + for example SQLAlchemy where the host name part is the connection URI: + + redis+socket:///tmp/redis.sock + + In this example the uri prefix will be ``redis``. + + * ``userid`` + + User id used to connect to the broker with. + + * ``virtual_host`` + + Virtual host used. + +- ``clock`` + + Value of the workers logical clock. This is a positive integer and should + be increasing every time you receive statistics. + +- ``pid`` + + Process id of the worker instance (Main process). + +- ``pool`` + + Pool-specific section. + + * ``max-concurrency`` + + Max number of processes/threads/green threads. + + * ``max-tasks-per-child`` + + Max number of tasks a thread may execute before being recycled. + + * ``processes`` + + List of pids (or thread-id's). + + * ``put-guarded-by-semaphore`` + + Internal + + * ``timeouts`` + + Default values for time limits. + + * ``writes`` + + Specific to the prefork pool, this shows the distribution of writes + to each process in the pool when using async I/O. + +- ``prefetch_count`` + + Current prefetch count value for the task consumer. + +- ``rusage`` + + System usage statistics. The fields available may be different + on your platform. + + From :manpage:`getrusage(2)`: + + * ``stime`` + + Time spent in operating system code on behalf of this process. + + * ``utime`` + + Time spent executing user instructions. + + * ``maxrss`` + + The maximum resident size used by this process (in kilobytes). + + * ``idrss`` + + Amount of unshared memory used for data (in kilobytes times ticks of + execution) + + * ``isrss`` + + Amount of unshared memory used for stack space (in kilobytes times + ticks of execution) + + * ``ixrss`` + + Amount of memory shared with other processes (in kilobytes times + ticks of execution). + + * ``inblock`` + + Number of times the file system had to read from the disk on behalf of + this process. + + * ``oublock`` + + Number of times the file system has to write to disk on behalf of + this process. + + * ``majflt`` + + Number of page faults which were serviced by doing I/O. + + * ``minflt`` + + Number of page faults which were serviced without doing I/O. + + * ``msgrcv`` + + Number of IPC messages received. + + * ``msgsnd`` + + Number of IPC messages sent. + + * ``nvcsw`` + + Number of times this process voluntarily invoked a context switch. + + * ``nivcsw`` + + Number of times an involuntary context switch took place. + + * ``nsignals`` + + Number of signals received. + + * ``nswap`` + + The number of times this process was swapped entirely out of memory. + + +- ``total`` + + List of task names and a total number of times that task have been + executed since worker start. + + +Additional Commands +=================== + +.. control:: shutdown + +Remote shutdown +--------------- + +This command will gracefully shut down the worker remotely: + +.. code-block:: python + + >>> app.control.broadcast('shutdown') # shutdown all workers + >>> app.control.broadcast('shutdown, destination="worker1@example.com") + +.. control:: ping + +Ping +---- + +This command requests a ping from alive workers. +The workers reply with the string 'pong', and that's just about it. +It will use the default one second timeout for replies unless you specify +a custom timeout: + +.. code-block:: python + + >>> app.control.ping(timeout=0.5) + [{'worker1.example.com': 'pong'}, + {'worker2.example.com': 'pong'}, + {'worker3.example.com': 'pong'}] + +:meth:`~@control.ping` also supports the `destination` argument, +so you can specify which workers to ping:: + + >>> ping(['worker2.example.com', 'worker3.example.com']) + [{'worker2.example.com': 'pong'}, + {'worker3.example.com': 'pong'}] + +.. _worker-enable-events: + +.. control:: enable_events +.. control:: disable_events + +Enable/disable events +--------------------- + +You can enable/disable events by using the `enable_events`, +`disable_events` commands. This is useful to temporarily monitor +a worker using :program:`celery events`/:program:`celerymon`. + +.. code-block:: python + + >>> app.control.enable_events() + >>> app.control.disable_events() + +.. _worker-custom-control-commands: + +Writing your own remote control commands +======================================== + +Remote control commands are registered in the control panel and +they take a single argument: the current +:class:`~celery.worker.control.ControlDispatch` instance. +From there you have access to the active +:class:`~celery.worker.consumer.Consumer` if needed. + +Here's an example control command that increments the task prefetch count: + +.. code-block:: python + + from celery.worker.control import Panel + + @Panel.register + def increase_prefetch_count(state, n=1): + state.consumer.qos.increment_eventually(n) + return {'ok': 'prefetch count incremented'} diff --git a/docs/whatsnew-2.5.rst b/docs/whatsnew-2.5.rst new file mode 100644 index 0000000..08dc313 --- /dev/null +++ b/docs/whatsnew-2.5.rst @@ -0,0 +1,569 @@ +.. _whatsnew-2.5: + +========================== + What's new in Celery 2.5 +========================== + +Celery aims to be a flexible and reliable, best-of-breed solution +to process vast amounts of messages in a distributed fashion, while +providing operations with the tools to maintain such a system. + +Celery has a large and diverse community of users and contributors, +you should come join us :ref:`on IRC ` +or :ref:`our mailing-list `. + +To read more about Celery you should visit our `website`_. + +While this version is backward compatible with previous versions +it is important that you read the following section. + +If you use Celery in combination with Django you must also +read the `django-celery changelog ` and upgrade to `django-celery 2.5`_. + +This version is officially supported on CPython 2.5, 2.6, 2.7, 3.2 and 3.3, +as well as PyPy and Jython. + + +.. _`website`: http://celeryproject.org/ +.. _`django-celery 2.5`: http://pypi.python.org/pypi/django-celery/ + +.. contents:: + :local: + +.. _v250-important: + +Important Notes +=============== + +Broker connection pool now enabled by default +--------------------------------------------- + +The default limit is 10 connections, if you have many threads/green-threads +using connections at the same time you may want to tweak this limit +to avoid contention. + +See the :setting:`BROKER_POOL_LIMIT` setting for more information. + +Also note that publishing tasks will be retried by default, to change +this default or the default retry policy see +:setting:`CELERY_TASK_PUBLISH_RETRY` and +:setting:`CELERY_TASK_PUBLISH_RETRY_POLICY`. + +Rabbit Result Backend: Exchange is no longer *auto delete* +---------------------------------------------------------- + +The exchange used for results in the Rabbit (AMQP) result backend +used to have the *auto_delete* flag set, which could result in a +race condition leading to an annoying warning. + +.. admonition:: For RabbitMQ users + + Old exchanges created with the *auto_delete* flag enabled has + to be removed. + + The :program:`camqadm` command can be used to delete the + previous exchange: + + .. code-block:: bash + + $ camqadm exchange.delete celeryresults + + As an alternative to deleting the old exchange you can + configure a new name for the exchange:: + + CELERY_RESULT_EXCHANGE = 'celeryresults2' + + But you have to make sure that all clients and workers + use this new setting, so they are updated to use the same + exchange name. + +Solution for hanging workers (but must be manually enabled) +----------------------------------------------------------- + +The :setting:`CELERYD_FORCE_EXECV` setting has been added to solve +a problem with deadlocks that originate when threads and fork is mixed +together: + +.. code-block:: python + + CELERYD_FORCE_EXECV = True + +This setting is recommended for all users using the prefork pool, +but especially users also using time limits or a max tasks per child +setting. + +- See `Python Issue 6721`_ to read more about this issue, and why + resorting to :func:`~os.execv`` is the only safe solution. + +Enabling this option will result in a slight performance penalty +when new child worker processes are started, and it will also increase +memory usage (but many platforms are optimized, so the impact may be +minimal). Considering that it ensures reliability when replacing +lost worker processes, it should be worth it. + +- It's already the default behavior on Windows. +- It will be the default behavior for all platforms in a future version. + +.. _`Python Issue 6721`: http://bugs.python.org/issue6721#msg140215 + +.. _v250-optimizations: + +Optimizations +============= + +- The code path used when the worker executes a task has been heavily + optimized, meaning the worker is able to process a great deal + more tasks/second compared to previous versions. As an example the solo + pool can now process up to 15000 tasks/second on a 4 core MacBook Pro + when using the `pylibrabbitmq`_ transport, where it previously + could only do 5000 tasks/second. + +- The task error tracebacks are now much shorter. + +- Fixed a noticeable delay in task processing when rate limits are enabled. + +.. _`pylibrabbitmq`: http://pypi.python.org/pylibrabbitmq/ + +.. _v250-deprecations: + +Deprecations +============ + +Removals +-------- + +* The old :class:`TaskSet` signature of ``(task_name, list_of_tasks)`` + can no longer be used (originally scheduled for removal in 2.4). + The deprecated ``.task_name`` and ``.task`` attributes has also been + removed. + +* The functions ``celery.execute.delay_task``, ``celery.execute.apply``, + and ``celery.execute.apply_async`` has been removed (originally) + scheduled for removal in 2.3). + +* The built-in ``ping`` task has been removed (originally scheduled + for removal in 2.3). Please use the ping broadcast command + instead. + +* It is no longer possible to import ``subtask`` and ``TaskSet`` + from :mod:`celery.task.base`, please import them from :mod:`celery.task` + instead (originally scheduled for removal in 2.4). + +Deprecations +------------ + +* The :mod:`celery.decorators` module has changed status + from pending deprecation to deprecated, and is scheduled for removal + in version 4.0. The ``celery.task`` module must be used instead. + +.. _v250-news: + +News +==== + +Timezone support +---------------- + +Celery can now be configured to treat all incoming and outgoing dates +as UTC, and the local timezone can be configured. + +This is not yet enabled by default, since enabling +time zone support means workers running versions pre 2.5 +will be out of sync with upgraded workers. + +To enable UTC you have to set :setting:`CELERY_ENABLE_UTC`:: + + CELERY_ENABLE_UTC = True + +When UTC is enabled, dates and times in task messages will be +converted to UTC, and then converted back to the local timezone +when received by a worker. + +You can change the local timezone using the :setting:`CELERY_TIMEZONE` +setting. Installing the :mod:`pytz` library is recommended when +using a custom timezone, to keep timezone definition up-to-date, +but it will fallback to a system definition of the timezone if available. + +UTC will enabled by default in version 3.0. + +.. note:: + + django-celery will use the local timezone as specified by the + ``TIME_ZONE`` setting, it will also honor the new `USE_TZ`_ setting + introuced in Django 1.4. + +.. _`USE_TZ`: https://docs.djangoproject.com/en/dev/topics/i18n/timezones/ + +New security serializer using cryptographic signing +--------------------------------------------------- + +A new serializer has been added that signs and verifies the signature +of messages. + +The name of the new serializer is ``auth``, and needs additional +configuration to work (see :ref:`conf-security`). + +.. seealso:: + + :ref:`guide-security` + +Contributed by Mher Movsisyan. + +Experimental support for automatic module reloading +--------------------------------------------------- + +Starting :program:`celeryd` with the :option:`--autoreload` option will +enable the worker to watch for file system changes to all imported task +modules imported (and also any non-task modules added to the +:setting:`CELERY_IMPORTS` setting or the :option:`-I|--include` option). + +This is an experimental feature intended for use in development only, +using auto-reload in production is discouraged as the behavior of reloading +a module in Python is undefined, and may cause hard to diagnose bugs and +crashes. Celery uses the same approach as the auto-reloader found in e.g. +the Django ``runserver`` command. + +When auto-reload is enabled the worker starts an additional thread +that watches for changes in the file system. New modules are imported, +and already imported modules are reloaded whenever a change is detected, +and if the prefork pool is used the child processes will finish the work +they are doing and exit, so that they can be replaced by fresh processes +effectively reloading the code. + +File system notification backends are pluggable, and Celery comes with three +implementations: + +* inotify (Linux) + + Used if the :mod:`pyinotify` library is installed. + If you are running on Linux this is the recommended implementation, + to install the :mod:`pyinotify` library you have to run the following + command: + + .. code-block:: bash + + $ pip install pyinotify + +* kqueue (OS X/BSD) + +* stat + + The fallback implementation simply polls the files using ``stat`` and is very + expensive. + +You can force an implementation by setting the :envvar:`CELERYD_FSNOTIFY` +environment variable: + +.. code-block:: bash + + $ env CELERYD_FSNOTIFY=stat celeryd -l info --autoreload + +Contributed by Mher Movsisyan. + + +New :setting:`CELERY_ANNOTATIONS` setting +----------------------------------------- + +This new setting enables the configuration to modify task classes +and their attributes. + +The setting can be a dict, or a list of annotation objects that filter +for tasks and return a map of attributes to change. + +As an example, this is an annotation to change the ``rate_limit`` attribute +for the ``tasks.add`` task: + +.. code-block:: python + + CELERY_ANNOTATIONS = {'tasks.add': {'rate_limit': '10/s'}} + +or change the same for all tasks: + +.. code-block:: python + + CELERY_ANNOTATIONS = {'*': {'rate_limit': '10/s'}} + +You can change methods too, for example the ``on_failure`` handler: + +.. code-block:: python + + def my_on_failure(self, exc, task_id, args, kwargs, einfo): + print('Oh no! Task failed: %r' % (exc, )) + + CELERY_ANNOTATIONS = {'*': {'on_failure': my_on_failure}} + +If you need more flexibility then you can also create objects +that filter for tasks to annotate: + +.. code-block:: python + + class MyAnnotate(object): + + def annotate(self, task): + if task.name.startswith('tasks.'): + return {'rate_limit': '10/s'} + + CELERY_ANNOTATIONS = (MyAnnotate(), {…}) + +``current`` provides the currently executing task +------------------------------------------------- + +The new :data:`celery.task.current` proxy will always give the currently +executing task. + +**Example**: + +.. code-block:: python + + from celery.task import current, task + + @task + def update_twitter_status(auth, message): + twitter = Twitter(auth) + try: + twitter.update_status(message) + except twitter.FailWhale, exc: + # retry in 10 seconds. + current.retry(countdown=10, exc=exc) + +Previously you would have to type ``update_twitter_status.retry(…)`` +here, which can be annoying for long task names. + +.. note:: + This will not work if the task function is called directly, i.e: + ``update_twitter_status(a, b)``. For that to work ``apply`` must + be used: ``update_twitter_status.apply((a, b))``. + +In Other News +------------- + +- Now depends on Kombu 2.1.0. + +- Efficient Chord support for the memcached backend (Issue #533) + + This means memcached joins Redis in the ability to do non-polling + chords. + + Contributed by Dan McGee. + +- Adds Chord support for the Rabbit result backend (amqp) + + The Rabbit result backend can now use the fallback chord solution. + +- Sending :sig:`QUIT` to celeryd will now cause it cold terminate. + + That is, it will not finish executing the tasks it is currently + working on. + + Contributed by Alec Clowes. + +- New "detailed" mode for the Cassandra backend. + + Allows to have a "detailed" mode for the Cassandra backend. + Basically the idea is to keep all states using Cassandra wide columns. + New states are then appended to the row as new columns, the last state + being the last column. + + See the :setting:`CASSANDRA_DETAILED_MODE` setting. + + Contributed by Steeve Morin. + +- The crontab parser now matches Vixie Cron behavior when parsing ranges + with steps (e.g. 1-59/2). + + Contributed by Daniel Hepper. + +- celerybeat can now be configured on the command-line like celeryd. + + Additional configuration must be added at the end of the argument list + followed by ``--``, for example: + + .. code-block:: bash + + $ celerybeat -l info -- celerybeat.max_loop_interval=10.0 + +- Now limits the number of frames in a traceback so that celeryd does not + crash on maximum recursion limit exceeded exceptions (Issue #615). + + The limit is set to the current recursion limit divided by 8 (which + is 125 by default). + + To get or set the current recursion limit use + :func:`sys.getrecursionlimit` and :func:`sys.setrecursionlimit`. + +- More information is now preserved in the pickleable traceback. + + This has been added so that Sentry can show more details. + + Contributed by Sean O'Connor. + +- CentOS init script has been updated and should be more flexible. + + Contributed by Andrew McFague. + +- MongoDB result backend now supports ``forget()``. + + Contributed by Andrew McFague + +- ``task.retry()`` now re-raises the original exception keeping + the original stack trace. + + Suggested by ojii. + +- The `--uid` argument to daemons now uses ``initgroups()`` to set + groups to all the groups the user is a member of. + + Contributed by Łukasz Oleś. + +- celeryctl: Added ``shell`` command. + + The shell will have the current_app (``celery``) and all tasks + automatically added to locals. + +- celeryctl: Added ``migrate`` command. + + The migrate command moves all tasks from one broker to another. + Note that this is experimental and you should have a backup + of the data before proceeding. + + **Examples**: + + .. code-block:: bash + + $ celeryctl migrate redis://localhost amqp://localhost + $ celeryctl migrate amqp://localhost//v1 amqp://localhost//v2 + $ python manage.py celeryctl migrate django:// redis:// + +* Routers can now override the ``exchange`` and ``routing_key`` used + to create missing queues (Issue #577). + + By default this will always use the name of the queue, + but you can now have a router return exchange and routing_key keys + to set them. + + This is useful when using routing classes which decides a destination + at runtime. + + Contributed by Akira Matsuzaki. + +- Redis result backend: Adds support for a ``max_connections`` parameter. + + It is now possible to configure the maximum number of + simultaneous connections in the Redis connection pool used for + results. + + The default max connections setting can be configured using the + :setting:`CELERY_REDIS_MAX_CONNECTIONS` setting, + or it can be changed individually by ``RedisBackend(max_connections=int)``. + + Contributed by Steeve Morin. + +- Redis result backend: Adds the ability to wait for results without polling. + + Contributed by Steeve Morin. + +- MongoDB result backend: Now supports save and restore taskset. + + Contributed by Julien Poissonnier. + +- There's a new :ref:`guide-security` guide in the documentation. + +- The init scripts has been updated, and many bugs fixed. + + Contributed by Chris Streeter. + +- User (tilde) is now expanded in command-line arguments. + +- Can now configure CELERYCTL envvar in :file:`/etc/default/celeryd`. + + While not necessary for operation, :program:`celeryctl` is used for the + ``celeryd status`` command, and the path to :program:`celeryctl` must be + configured for that to work. + + The daemonization cookbook contains examples. + + Contributed by Jude Nagurney. + +- The MongoDB result backend can now use Replica Sets. + + Contributed by Ivan Metzlar. + +- gevent: Now supports autoscaling (Issue #599). + + Contributed by Mark Lavin. + +- multiprocessing: Mediator thread is now always enabled, + even though rate limits are disabled, as the pool semaphore + is known to block the main thread, causing broadcast commands and + shutdown to depend on the semaphore being released. + +Fixes +===== + +- Exceptions that are re-raised with a new exception object now keeps + the original stack trace. + +- Windows: Fixed the ``no handlers found for multiprocessing`` warning. + +- Windows: The ``celeryd`` program can now be used. + + Previously Windows users had to launch celeryd using + ``python -m celery.bin.celeryd``. + +- Redis result backend: Now uses ``SETEX`` command to set result key, + and expiry atomically. + + Suggested by yaniv-aknin. + +- celeryd: Fixed a problem where shutdown hanged when Ctrl+C was used to + terminate. + +- celeryd: No longer crashes when channel errors occur. + + Fix contributed by Roger Hu. + +- Fixed memory leak in the eventlet pool, caused by the + use of ``greenlet.getcurrent``. + + Fix contributed by Ignas Mikalajūnas. + + +- Cassandra backend: No longer uses :func:`pycassa.connect` which is + deprecated since :mod:`pycassa` 1.4. + + Fix contributed by Jeff Terrace. + +- Fixed unicode decode errors that could occur while sending error emails. + + Fix contributed by Seong Wun Mun. + +- ``celery.bin`` programs now always defines ``__package__`` as recommended + by PEP-366. + +- ``send_task`` now emits a warning when used in combination with + :setting:`CELERY_ALWAYS_EAGER` (Issue #581). + + Contributed by Mher Movsisyan. + +- ``apply_async`` now forwards the original keyword arguments to ``apply`` + when :setting:`CELERY_ALWAYS_EAGER` is enabled. + +- celeryev now tries to re-establish the connection if the connection + to the broker is lost (Issue #574). + +- celeryev: Fixed a crash occurring if a task has no associated worker + information. + + Fix contributed by Matt Williamson. + +- The current date and time is now consistently taken from the current loaders + ``now`` method. + +- Now shows helpful error message when given a config module ending in + ``.py`` that can't be imported. + +- celeryctl: The ``--expires`` and ``-eta`` arguments to the apply command + can now be an ISO-8601 formatted string. + +- celeryctl now exits with exit status ``EX_UNAVAILABLE`` (69) if no replies + have been received. diff --git a/docs/whatsnew-3.0.rst b/docs/whatsnew-3.0.rst new file mode 100644 index 0000000..8c2f831 --- /dev/null +++ b/docs/whatsnew-3.0.rst @@ -0,0 +1,998 @@ +.. _whatsnew-3.0: + +=========================================== + What's new in Celery 3.0 (Chiastic Slide) +=========================================== + +Celery is a simple, flexible and reliable distributed system to +process vast amounts of messages, while providing operations with +the tools required to maintain such a system. + +It's a task queue with focus on real-time processing, while also +supporting task scheduling. + +Celery has a large and diverse community of users and contributors, +you should come join us :ref:`on IRC ` +or :ref:`our mailing-list `. + +To read more about Celery you should go read the :ref:`introduction `. + +While this version is backward compatible with previous versions +it's important that you read the following section. + +If you use Celery in combination with Django you must also +read the `django-celery changelog`_ and upgrade to `django-celery 3.0`_. + +This version is officially supported on CPython 2.5, 2.6, 2.7, 3.2 and 3.3, +as well as PyPy and Jython. + +Highlights +========== + +.. topic:: Overview + + - A new and improved API, that is both simpler and more powerful. + + Everyone must read the new :ref:`first-steps` tutorial, + and the new :ref:`next-steps` tutorial. Oh, and + why not reread the user guide while you're at it :) + + There are no current plans to deprecate the old API, + so you don't have to be in a hurry to port your applications. + + - The worker is now thread-less, giving great performance improvements. + + - The new "Canvas" makes it easy to define complex workflows. + + Ever wanted to chain tasks together? This is possible, but + not just that, now you can even chain together groups and chords, + or even combine multiple chains. + + Read more in the :ref:`Canvas ` user guide. + + - All of Celery's command-line programs are now available from a single + :program:`celery` umbrella command. + + - This is the last version to support Python 2.5. + + Starting with Celery 3.1, Python 2.6 or later is required. + + - Support for the new librabbitmq C client. + + Celery will automatically use the :mod:`librabbitmq` module + if installed, which is a very fast and memory-optimized + replacement for the py-amqp module. + + - Redis support is more reliable with improved ack emulation. + + - Celery now always uses UTC + + - Over 600 commits, 30k additions/36k deletions. + + In comparison 1.0➝ 2.0 had 18k additions/8k deletions. + + +.. _`website`: http://celeryproject.org/ +.. _`django-celery changelog`: + http://github.com/celery/django-celery/tree/master/Changelog +.. _`django-celery 3.0`: http://pypi.python.org/pypi/django-celery/ + +.. contents:: + :local: + :depth: 2 + +.. _v300-important: + +Important Notes +=============== + +Broadcast exchanges renamed +--------------------------- + +The workers remote control command exchanges has been renamed +(a new pidbox name), this is because the ``auto_delete`` flag on the exchanges +has been removed, and that makes it incompatible with earlier versions. + +You can manually delete the old exchanges if you want, +using the :program:`celery amqp` command (previously called ``camqadm``): + +.. code-block:: bash + + $ celery amqp exchange.delete celeryd.pidbox + $ celery amqp exchange.delete reply.celeryd.pidbox + +Eventloop +--------- + +The worker is now running *without threads* when used with RabbitMQ (AMQP), +or Redis as a broker, resulting in: + +- Much better overall performance. +- Fixes several edge case race conditions. +- Sub-millisecond timer precision. +- Faster shutdown times. + +The transports supported are: ``py-amqp`` ``librabbitmq``, ``redis``, +and ``amqplib``. +Hopefully this can be extended to include additional broker transports +in the future. + +For increased reliability the :setting:`CELERY_FORCE_EXECV` setting is enabled +by default if the eventloop is not used. + +New ``celery`` umbrella command +------------------------------- + +All Celery's command-line programs are now available from a single +:program:`celery` umbrella command. + +You can see a list of subcommands and options by running: + +.. code-block:: bash + + $ celery help + +Commands include: + +- ``celery worker`` (previously ``celeryd``). + +- ``celery beat`` (previously ``celerybeat``). + +- ``celery amqp`` (previously ``camqadm``). + +The old programs are still available (``celeryd``, ``celerybeat``, etc), +but you are discouraged from using them. + +Now depends on :mod:`billiard`. +------------------------------- + +Billiard is a fork of the multiprocessing containing +the no-execv patch by sbt (http://bugs.python.org/issue8713), +and also contains the pool improvements previously located in Celery. + +This fork was necessary as changes to the C extension code was required +for the no-execv patch to work. + +- Issue #625 +- Issue #627 +- Issue #640 +- `django-celery #122 >> from celery import chain + + # (2 + 2) * 8 / 2 + >>> res = chain(add.subtask((2, 2)), + mul.subtask((8, )), + div.subtask((2,))).apply_async() + >>> res.get() == 16 + + >>> res.parent.get() == 32 + + >>> res.parent.parent.get() == 4 + +- Adds :meth:`AsyncResult.get_leaf` + + Waits and returns the result of the leaf subtask. + That is the last node found when traversing the graph, + but this means that the graph can be 1-dimensional only (in effect + a list). + +- Adds ``subtask.link(subtask)`` + ``subtask.link_error(subtask)`` + + Shortcut to ``s.options.setdefault('link', []).append(subtask)`` + +- Adds ``subtask.flatten_links()`` + + Returns a flattened list of all dependencies (recursively) + +Redis: Priority support. +------------------------ + +The message's ``priority`` field is now respected by the Redis +transport by having multiple lists for each named queue. +The queues are then consumed by in order of priority. + +The priority field is a number in the range of 0 - 9, where +0 is the default and highest priority. + +The priority range is collapsed into four steps by default, since it is +unlikely that nine steps will yield more benefit than using four steps. +The number of steps can be configured by setting the ``priority_steps`` +transport option, which must be a list of numbers in **sorted order**:: + + >>> BROKER_TRANSPORT_OPTIONS = { + ... 'priority_steps': [0, 2, 4, 6, 8, 9], + ... } + +Priorities implemented in this way is not as reliable as +priorities on the server side, which is why +the feature is nicknamed "quasi-priorities"; +**Using routing is still the suggested way of ensuring +quality of service**, as client implemented priorities +fall short in a number of ways, e.g. if the worker +is busy with long running tasks, has prefetched many messages, +or the queues are congested. + +Still, it is possible that using priorities in combination +with routing can be more beneficial than using routing +or priorities alone. Experimentation and monitoring +should be used to prove this. + +Contributed by Germán M. Bravo. + +Redis: Now cycles queues so that consuming is fair. +--------------------------------------------------- + +This ensures that a very busy queue won't block messages +from other queues, and ensures that all queues have +an equal chance of being consumed from. + +This used to be the case before, but the behavior was +accidentally changed while switching to using blocking pop. + + +`group`/`chord`/`chain` are now subtasks +---------------------------------------- + +- group is no longer an alias to TaskSet, but new alltogether, + since it was very difficult to migrate the TaskSet class to become + a subtask. + +- A new shortcut has been added to tasks: + + :: + + >>> task.s(arg1, arg2, kw=1) + + as a shortcut to:: + + >>> task.subtask((arg1, arg2), {'kw': 1}) + +- Tasks can be chained by using the ``|`` operator:: + + >>> (add.s(2, 2), pow.s(2)).apply_async() + +- Subtasks can be "evaluated" using the ``~`` operator: + + :: + + >>> ~add.s(2, 2) + 4 + + >>> ~(add.s(2, 2) | pow.s(2)) + + is the same as:: + + >>> chain(add.s(2, 2), pow.s(2)).apply_async().get() + +- A new subtask_type key has been added to the subtask dicts + + This can be the string "chord", "group", "chain", "chunks", + "xmap", or "xstarmap". + +- maybe_subtask now uses subtask_type to reconstruct + the object, to be used when using non-pickle serializers. + +- The logic for these operations have been moved to dedicated + tasks celery.chord, celery.chain and celery.group. + +- subtask no longer inherits from AttributeDict. + + It's now a pure dict subclass with properties for attribute + access to the relevant keys. + +- The repr's now outputs how the sequence would like imperatively:: + + >>> from celery import chord + + >>> (chord([add.s(i, i) for i in xrange(10)], xsum.s()) + | pow.s(2)) + tasks.xsum([tasks.add(0, 0), + tasks.add(1, 1), + tasks.add(2, 2), + tasks.add(3, 3), + tasks.add(4, 4), + tasks.add(5, 5), + tasks.add(6, 6), + tasks.add(7, 7), + tasks.add(8, 8), + tasks.add(9, 9)]) | tasks.pow(2) + +New remote control commands +--------------------------- + +These commands were previously experimental, but they have proven +stable and is now documented as part of the offical API. + +- :control:`add_consumer`/:control:`cancel_consumer` + + Tells workers to consume from a new queue, or cancel consuming from a + queue. This command has also been changed so that the worker remembers + the queues added, so that the change will persist even if + the connection is re-connected. + + These commands are available programmatically as + :meth:`@control.add_consumer` / :meth:`@control.cancel_consumer`: + + .. code-block:: python + + >>> celery.control.add_consumer(queue_name, + ... destination=['w1.example.com']) + >>> celery.control.cancel_consumer(queue_name, + ... destination=['w1.example.com']) + + or using the :program:`celery control` command: + + .. code-block:: bash + + $ celery control -d w1.example.com add_consumer queue + $ celery control -d w1.example.com cancel_consumer queue + + .. note:: + + Remember that a control command without *destination* will be + sent to **all workers**. + +- :control:`autoscale` + + Tells workers with `--autoscale` enabled to change autoscale + max/min concurrency settings. + + This command is available programmatically as :meth:`@control.autoscale`: + + .. code-block:: python + + >>> celery.control.autoscale(max=10, min=5, + ... destination=['w1.example.com']) + + or using the :program:`celery control` command: + + .. code-block:: bash + + $ celery control -d w1.example.com autoscale 10 5 + +- :control:`pool_grow`/:control:`pool_shrink` + + Tells workers to add or remove pool processes. + + These commands are available programmatically as + :meth:`@control.pool_grow` / :meth:`@control.pool_shrink`: + + .. code-block:: python + + >>> celery.control.pool_grow(2, destination=['w1.example.com']) + >>> celery.contorl.pool_shrink(2, destination=['w1.example.com']) + + or using the :program:`celery control` command: + + .. code-block:: bash + + $ celery control -d w1.example.com pool_grow 2 + $ celery control -d w1.example.com pool_shrink 2 + +- :program:`celery control` now supports :control:`rate_limit` and + :control:`time_limit` commands. + + See ``celery control --help`` for details. + +Crontab now supports Day of Month, and Month of Year arguments +-------------------------------------------------------------- + +See the updated list of examples at :ref:`beat-crontab`. + +Immutable subtasks +------------------ + +``subtask``'s can now be immutable, which means that the arguments +will not be modified when calling callbacks:: + + >>> chain(add.s(2, 2), clear_static_electricity.si()) + +means it will not receive the argument of the parent task, +and ``.si()`` is a shortcut to:: + + >>> clear_static_electricity.subtask(immutable=True) + +Logging Improvements +-------------------- + +Logging support now conforms better with best practices. + +- Classes used by the worker no longer uses app.get_default_logger, but uses + `celery.utils.log.get_logger` which simply gets the logger not setting the + level, and adds a NullHandler. + +- Loggers are no longer passed around, instead every module using logging + defines a module global logger that is used throughout. + +- All loggers inherit from a common logger called "celery". + +- Before task.get_logger would setup a new logger for every task, + and even set the loglevel. This is no longer the case. + + - Instead all task loggers now inherit from a common "celery.task" logger + that is set up when programs call `setup_logging_subsystem`. + + - Instead of using LoggerAdapter to augment the formatter with + the task_id and task_name field, the task base logger now use + a special formatter adding these values at runtime from the + currently executing task. + +- In fact, ``task.get_logger`` is no longer recommended, it is better + to add a module-level logger to your tasks module. + + For example, like this: + + .. code-block:: python + + from celery.utils.log import get_task_logger + + logger = get_task_logger(__name__) + + @celery.task + def add(x, y): + logger.debug('Adding %r + %r' % (x, y)) + return x + y + + The resulting logger will then inherit from the ``"celery.task"`` logger + so that the current task name and id is included in logging output. + +- Redirected output from stdout/stderr is now logged to a "celery.redirected" + logger. + +- In addition a few warnings.warn have been replaced with logger.warn. + +- Now avoids the 'no handlers for logger multiprocessing' warning + +Task registry no longer global +------------------------------ + +Every Celery instance now has its own task registry. + +You can make apps share registries by specifying it:: + + >>> app1 = Celery() + >>> app2 = Celery(tasks=app1.tasks) + +Note that tasks are shared between registries by default, so that +tasks will be added to every subsequently created task registry. +As an alternative tasks can be private to specific task registries +by setting the ``shared`` argument to the ``@task`` decorator:: + + @celery.task(shared=False) + def add(x, y): + return x + y + + +Abstract tasks are now lazily bound. +------------------------------------ + +The :class:`~celery.task.Task` class is no longer bound to an app +by default, it will first be bound (and configured) when +a concrete subclass is created. + +This means that you can safely import and make task base classes, +without also initializing the app environment:: + + from celery.task import Task + + class DebugTask(Task): + abstract = True + + def __call__(self, *args, **kwargs): + print('CALLING %r' % (self, )) + return self.run(*args, **kwargs) + + >>> DebugTask + + + >>> @celery1.task(base=DebugTask) + ... def add(x, y): + ... return x + y + >>> add.__class__ + > + + +Lazy task decorators +-------------------- + +The ``@task`` decorator is now lazy when used with custom apps. + +That is, if ``accept_magic_kwargs`` is enabled (herby called "compat mode"), the task +decorator executes inline like before, however for custom apps the @task +decorator now returns a special PromiseProxy object that is only evaluated +on access. + +All promises will be evaluated when `app.finalize` is called, or implicitly +when the task registry is first used. + + +Smart `--app` option +-------------------- + +The :option:`--app` option now 'auto-detects' + + - If the provided path is a module it tries to get an + attribute named 'celery'. + + - If the provided path is a package it tries + to import a submodule named 'celery', + and get the celery attribute from that module. + +E.g. if you have a project named 'proj' where the +celery app is located in 'from proj.celery import app', +then the following will be equivalent: + +.. code-block:: bash + + $ celery worker --app=proj + $ celery worker --app=proj.celery: + $ celery worker --app=proj.celery:app + +In Other News +------------- + +- New :setting:`CELERYD_WORKER_LOST_WAIT` to control the timeout in + seconds before :exc:`billiard.WorkerLostError` is raised + when a worker can not be signalled (Issue #595). + + Contributed by Brendon Crawford. + +- Redis event monitor queues are now automatically deleted (Issue #436). + +- App instance factory methods have been converted to be cached + descriptors that creates a new subclass on access. + + This means that e.g. ``app.Worker`` is an actual class + and will work as expected when:: + + class Worker(app.Worker): + ... + +- New signal: :signal:`task_success`. + +- Multiprocessing logs are now only emitted if the :envvar:`MP_LOG` + environment variable is set. + +- The Celery instance can now be created with a broker URL + + .. code-block:: python + + app = Celery(broker='redis://') + +- Result backends can now be set using an URL + + Currently only supported by redis. Example use:: + + CELERY_RESULT_BACKEND = 'redis://localhost/1' + +- Heartbeat frequency now every 5s, and frequency sent with event + + The heartbeat frequency is now available in the worker event messages, + so that clients can decide when to consider workers offline based on + this value. + +- Module celery.actors has been removed, and will be part of cl instead. + +- Introduces new ``celery`` command, which is an entrypoint for all other + commands. + + The main for this command can be run by calling ``celery.start()``. + +- Annotations now supports decorators if the key startswith '@'. + + E.g.: + + .. code-block:: python + + def debug_args(fun): + + @wraps(fun) + def _inner(*args, **kwargs): + print('ARGS: %r' % (args, )) + return _inner + + CELERY_ANNOTATIONS = { + 'tasks.add': {'@__call__': debug_args}, + } + + Also tasks are now always bound by class so that + annotated methods end up being bound. + +- Bugreport now available as a command and broadcast command + + - Get it from a Python repl:: + + >>> import celery + >>> print(celery.bugreport()) + + - Using the ``celery`` command line program: + + .. code-block:: bash + + $ celery report + + - Get it from remote workers: + + .. code-block:: bash + + $ celery inspect report + +- Module ``celery.log`` moved to :mod:`celery.app.log`. + +- Module ``celery.task.control`` moved to :mod:`celery.app.control`. + +- New signal: :signal:`task_revoked` + + Sent in the main process when the task is revoked or terminated. + +- ``AsyncResult.task_id`` renamed to ``AsyncResult.id`` + +- ``TasksetResult.taskset_id`` renamed to ``.id`` + +- ``xmap(task, sequence)`` and ``xstarmap(task, sequence)`` + + Returns a list of the results applying the task function to every item + in the sequence. + + Example:: + + >>> from celery import xstarmap + + >>> xstarmap(add, zip(range(10), range(10)).apply_async() + [0, 2, 4, 6, 8, 10, 12, 14, 16, 18] + +- ``chunks(task, sequence, chunksize)`` + +- ``group.skew(start=, stop=, step=)`` + + Skew will skew the countdown for the individual tasks in a group, + e.g. with a group:: + + >>> g = group(add.s(i, i) for i in xrange(10)) + + Skewing the tasks from 0 seconds to 10 seconds:: + + >>> g.skew(stop=10) + + Will have the first task execute in 0 seconds, the second in 1 second, + the third in 2 seconds and so on. + +- 99% test Coverage + +- :setting:`CELERY_QUEUES` can now be a list/tuple of :class:`~kombu.Queue` + instances. + + Internally :attr:`@amqp.queues` is now a mapping of name/Queue instances, + instead of converting on the fly. + +- Can now specify connection for :class:`@control.inspect`. + + .. code-block:: python + + from kombu import Connection + + i = celery.control.inspect(connection=Connection('redis://')) + i.active_queues() + +- :setting:`CELERY_FORCE_EXECV` is now enabled by default. + + If the old behavior is wanted the setting can be set to False, + or the new :option:`--no-execv` to :program:`celery worker`. + +- Deprecated module ``celery.conf`` has been removed. + +- The :setting:`CELERY_TIMEZONE` now always require the :mod:`pytz` + library to be installed (exept if the timezone is set to `UTC`). + +- The Tokyo Tyrant backend has been removed and is no longer supported. + +- Now uses :func:`~kombu.common.maybe_declare` to cache queue declarations. + +- There is no longer a global default for the + :setting:`CELERYBEAT_MAX_LOOP_INTERVAL` setting, it is instead + set by individual schedulers. + +- Worker: now truncates very long message bodies in error reports. + +- No longer deepcopies exceptions when trying to serialize errors. + +- :envvar:`CELERY_BENCH` environment variable, will now also list + memory usage statistics at worker shutdown. + +- Worker: now only ever use a single timer for all timing needs, + and instead set different priorities. + +- An exceptions arguments are now safely pickled + + Contributed by Matt Long. + +- Worker/Celerybeat no longer logs the startup banner. + + Previously it would be logged with severity warning, + now it's only written to stdout. + +- The ``contrib/`` directory in the distribution has been renamed to + ``extra/``. + +- New signal: :signal:`task_revoked` + +- celery.contrib.migrate: Many improvements including + filtering, queue migration, and support for acking messages on the broker + migrating from. + + Contributed by John Watson. + +- Worker: Prefetch count increments are now optimized and grouped together. + +- Worker: No longer calls ``consume`` on the remote control command queue + twice. + + Probably didn't cause any problems, but was unecessary. + +Internals +--------- + +- ``app.broker_connection`` is now ``app.connection`` + + Both names still work. + +- Compat modules are now generated dynamically upon use. + + These modules are ``celery.messaging``, ``celery.log``, + ``celery.decorators`` and ``celery.registry``. + +- :mod:`celery.utils` refactored into multiple modules: + + :mod:`celery.utils.text` + :mod:`celery.utils.imports` + :mod:`celery.utils.functional` + +- Now using :mod:`kombu.utils.encoding` instead of + :mod:`celery.utils.encoding`. + +- Renamed module ``celery.routes`` -> :mod:`celery.app.routes`. + +- Renamed package ``celery.db`` -> :mod:`celery.backends.database`. + +- Renamed module ``celery.abstract`` -> :mod:`celery.worker.bootsteps`. + +- Command line docs are now parsed from the module docstrings. + +- Test suite directory has been reorganized. + +- :program:`setup.py` now reads docs from the :file:`requirements/` directory. + +- Celery commands no longer wraps output (Issue #700). + + Contributed by Thomas Johansson. + +.. _v300-experimental: + +Experimental +============ + +:mod:`celery.contrib.methods`: Task decorator for methods +---------------------------------------------------------- + +This is an experimental module containing a task +decorator, and a task decorator filter, that can be used +to create tasks out of methods:: + + from celery.contrib.methods import task_method + + class Counter(object): + + def __init__(self): + self.value = 1 + + @celery.task(name='Counter.increment', filter=task_method) + def increment(self, n=1): + self.value += 1 + return self.value + + +See :mod:`celery.contrib.methods` for more information. + +.. _v300-unscheduled-removals: + +Unscheduled Removals +==================== + +Usually we don't make backward incompatible removals, +but these removals should have no major effect. + +- The following settings have been renamed: + + - ``CELERYD_ETA_SCHEDULER`` -> ``CELERYD_TIMER`` + - ``CELERYD_ETA_SCHEDULER_PRECISION`` -> ``CELERYD_TIMER_PRECISION`` + +.. _v300-deprecations: + +Deprecations +============ + +See the :ref:`deprecation-timeline`. + +- The ``celery.backends.pyredis`` compat module has been removed. + + Use :mod:`celery.backends.redis` instead! + +- The following undocumented API's has been moved: + + - ``control.inspect.add_consumer`` -> :meth:`@control.add_consumer`. + - ``control.inspect.cancel_consumer`` -> :meth:`@control.cancel_consumer`. + - ``control.inspect.enable_events`` -> :meth:`@control.enable_events`. + - ``control.inspect.disable_events`` -> :meth:`@control.disable_events`. + + This way ``inspect()`` is only used for commands that do not + modify anything, while idempotent control commands that make changes + are on the control objects. + +Fixes +===== + +- Retry sqlalchemy backend operations on DatabaseError/OperationalError + (Issue #634) + +- Tasks that called ``retry`` was not acknowledged if acks late was enabled + + Fix contributed by David Markey. + +- The message priority argument was not properly propagated to Kombu + (Issue #708). + + Fix contributed by Eran Rundstein diff --git a/docs/whatsnew-3.1.rst b/docs/whatsnew-3.1.rst new file mode 100644 index 0000000..7decefe --- /dev/null +++ b/docs/whatsnew-3.1.rst @@ -0,0 +1,1269 @@ +.. _whatsnew-3.1: + +=========================================== + What's new in Celery 3.1 (Cipater) +=========================================== +:Author: Ask Solem (ask at celeryproject.org) + +.. sidebar:: Change history + + What's new documents describe the changes in major versions, + we also have a :ref:`changelog` that lists the changes in bugfix + releases (0.0.x), while older series are archived under the :ref:`history` + section. + +Celery is a simple, flexible and reliable distributed system to +process vast amounts of messages, while providing operations with +the tools required to maintain such a system. + +It's a task queue with focus on real-time processing, while also +supporting task scheduling. + +Celery has a large and diverse community of users and contributors, +you should come join us :ref:`on IRC ` +or :ref:`our mailing-list `. + +To read more about Celery you should go read the :ref:`introduction `. + +While this version is backward compatible with previous versions +it's important that you read the following section. + +This version is officially supported on CPython 2.6, 2.7 and 3.3, +and also supported on PyPy. + +.. _`website`: http://celeryproject.org/ + +.. topic:: Table of Contents + + Make sure you read the important notes before upgrading to this version. + +.. contents:: + :local: + :depth: 2 + +Preface +======= + +Deadlocks have long plagued our workers, and while uncommon they are +not acceptable. They are also infamous for being extremely hard to diagnose +and reproduce, so to make this job easier I wrote a stress test suite that +bombards the worker with different tasks in an attempt to break it. + +What happens if thousands of worker child processes are killed every +second? what if we also kill the broker connection every 10 +seconds? These are examples of what the stress test suite will do to the +worker, and it reruns these tests using different configuration combinations +to find edge case bugs. + +The end result was that I had to rewrite the prefork pool to avoid the use +of the POSIX semaphore. This was extremely challenging, but after +months of hard work the worker now finally passes the stress test suite. + +There's probably more bugs to find, but the good news is +that we now have a tool to reproduce them, so should you be so unlucky to +experience a bug then we'll write a test for it and squash it! + +Note that I have also moved many broker transports into experimental status: +the only transports recommended for production use today is RabbitMQ and +Redis. + +I don't have the resources to maintain all of them, so bugs are left +unresolved. I wish that someone will step up and take responsibility for +these transports or donate resources to improve them, but as the situation +is now I don't think the quality is up to date with the rest of the code-base +so I cannot recommend them for production use. + +The next version of Celery 3.2 will focus on performance and removing +rarely used parts of the library. Work has also started on a new message +protocol, supporting multiple languages and more. The initial draft can +be found :ref:`here `). + +The worker will emit a deprecation warning if you don't define this setting. + +.. topic:: for Kombu users + + Kombu 3.0 no longer accepts pickled messages by default, so if you + use Kombu directly then you have to configure your consumers: + see the :ref:`Kombu 3.0 Changelog ` for more + information. + +Old command-line programs removed and deprecated +------------------------------------------------ + +Everyone should move to the new :program:`celery` umbrella +command, so we are incrementally deprecating the old command names. + +In this version we've removed all commands that are not used +in init scripts. The rest will be removed in 3.2. + ++-------------------+--------------+-------------------------------------+ +| Program | New Status | Replacement | ++===================+==============+=====================================+ +| ``celeryd`` | *DEPRECATED* | :program:`celery worker` | ++-------------------+--------------+-------------------------------------+ +| ``celerybeat`` | *DEPRECATED* | :program:`celery beat` | ++-------------------+--------------+-------------------------------------+ +| ``celeryd-multi`` | *DEPRECATED* | :program:`celery multi` | ++-------------------+--------------+-------------------------------------+ +| ``celeryctl`` | **REMOVED** | :program:`celery inspect|control` | ++-------------------+--------------+-------------------------------------+ +| ``celeryev`` | **REMOVED** | :program:`celery events` | ++-------------------+--------------+-------------------------------------+ +| ``camqadm`` | **REMOVED** | :program:`celery amqp` | ++-------------------+--------------+-------------------------------------+ + +If this is not a new installation then you may want to remove the old +commands: + +.. code-block:: bash + + $ pip uninstall celery + $ # repeat until it fails + # ... + $ pip uninstall celery + $ pip install celery + +Please run :program:`celery --help` for help using the umbrella command. + +.. _v310-news: + +News +==== + +Prefork Pool Improvements +------------------------- + +These improvements are only active if you use an async capable +transport. This means only RabbitMQ (AMQP) and Redis are supported +at this point and other transports will still use the thread-based fallback +implementation. + +- Pool is now using one IPC queue per child process. + + Previously the pool shared one queue between all child processes, + using a POSIX semaphore as a mutex to achieve exclusive read and write + access. + + The POSIX semaphore has now been removed and each child process + gets a dedicated queue. This means that the worker will require more + file descriptors (two descriptors per process), but it also means + that performance is improved and we can send work to individual child + processes. + + POSIX semaphores are not released when a process is killed, so killing + processes could lead to a deadlock if it happened while the semaphore was + acquired. There is no good solution to fix this, so the best option + was to remove the semaphore. + +- Asynchronous write operations + + The pool now uses async I/O to send work to the child processes. + +- Lost process detection is now immediate. + + If a child process is killed or exits mysteriously the pool previously + had to wait for 30 seconds before marking the task with a + :exc:`~celery.exceptions.WorkerLostError`. It had to do this because + the outqueue was shared between all processes, and the pool could not + be certain whether the process completed the task or not. So an arbitrary + timeout of 30 seconds was chosen, as it was believed that the outqueue + would have been drained by this point. + + This timeout is no longer necessary, and so the task can be marked as + failed as soon as the pool gets the notification that the process exited. + +- Rare race conditions fixed + + Most of these bugs were never reported to us, but was discovered while + running the new stress test suite. + +Caveats +~~~~~~~ + +.. topic:: Long running tasks + + The new pool will send tasks to a child process as long as the process + inqueue is writable, and since the socket is buffered this means + that the processes are, in effect, prefetching tasks. + + This benefits performance but it also means that other tasks may be stuck + waiting for a long running task to complete:: + + -> send T1 to Process A + # A executes T1 + -> send T2 to Process B + # B executes T2 + <- T2 complete + + -> send T3 to Process A + # A still executing T1, T3 stuck in local buffer and + # will not start until T1 returns + + The buffer size varies based on the operating system: some may + have a buffer as small as 64kb but on recent Linux versions the buffer + size is 1MB (can only be changed system wide). + + You can disable this prefetching behavior by enabling the :option:`-Ofair` + worker option: + + .. code-block:: bash + + $ celery -A proj worker -l info -Ofair + + With this option enabled the worker will only write to workers that are + available for work, disabling the prefetch behavior. + +.. topic:: Max tasks per child + + If a process exits and pool prefetch is enabled the worker may have + already written many tasks to the process inqueue, and these tasks + must then be moved back and rewritten to a new process. + + This is very expensive if you have ``--maxtasksperchild`` set to a low + value (e.g. less than 10), so if you need to enable this option + you should also enable ``-Ofair`` to turn off the prefetching behavior. + +Django supported out of the box +------------------------------- + +Celery 3.0 introduced a shiny new API, but unfortunately did not +have a solution for Django users. + +The situation changes with this version as Django is now supported +in core and new Django users coming to Celery are now expected +to use the new API directly. + +The Django community has a convention where there's a separate +django-x package for every library, acting like a bridge between +Django and the library. + +Having a separate project for Django users has been a pain for Celery, +with multiple issue trackers and multiple documentation +sources, and then lastly since 3.0 we even had different APIs. + +With this version we challenge that convention and Django users will +use the same library, the same API and the same documentation as +everyone else. + +There is no rush to port your existing code to use the new API, +but if you would like to experiment with it you should know that: + +- You need to use a Celery application instance. + + The new Celery API introduced in 3.0 requires users to instantiate the + library by creating an application: + + .. code-block:: python + + from celery import Celery + + app = Celery() + +- You need to explicitly integrate Celery with Django + + Celery will not automatically use the Django settings, so you can + either configure Celery separately or you can tell it to use the Django + settings with: + + .. code-block:: python + + app.config_from_object('django.conf:settings') + + Neither will it automatically traverse your installed apps to find task + modules, but this still available as an option you must enable: + + .. code-block:: python + + from django.conf import settings + app.autodiscover_tasks(settings.INSTALLED_APPS) + +- You no longer use ``manage.py`` + + Instead you use the :program:`celery` command directly: + + .. code-block:: bash + + celery -A proj worker -l info + + For this to work your app module must store the :envvar:`DJANGO_SETTINGS_MODULE` + environment variable, see the example in the :ref:`Django + guide `. + +To get started with the new API you should first read the :ref:`first-steps` +tutorial, and then you should read the Django specific instructions in +:ref:`django-first-steps`. + +The fixes and improvements applied by the django-celery library are now +automatically applied by core Celery when it detects that +the :envvar:`DJANGO_SETTINGS_MODULE` environment variable is set. + +The distribution ships with a new example project using Django +in :file:`examples/django`: + +http://github.com/celery/celery/tree/3.1/examples/django + +Some features still require the :mod:`django-celery` library: + + - Celery does not implement the Django database or cache result backends. + - Celery does not ship with the database-based periodic task + scheduler. + +.. note:: + + If you're still using the old API when you upgrade to Celery 3.1 + then you must make sure that your settings module contains + the ``djcelery.setup_loader()`` line, since this will + no longer happen as a side-effect of importing the :mod:`djcelery` + module. + + New users (or if you have ported to the new API) don't need the ``setup_loader`` + line anymore, and must make sure to remove it. + +Events are now ordered using logical time +----------------------------------------- + +Keeping physical clocks in perfect sync is impossible, so using +timestamps to order events in a distributed system is not reliable. + +Celery event messages have included a logical clock value for some time, +but starting with this version that field is also used to order them. + +Also, events now record timezone information +by including a new ``utcoffset`` field in the event message. +This is a signed integer telling the difference from UTC time in hours, +so e.g. an even sent from the Europe/London timezone in daylight savings +time will have an offset of 1. + +:class:`@events.Receiver` will automatically convert the timestamps +to the local timezone. + +.. note:: + + The logical clock is synchronized with other nodes + in the same cluster (neighbors), so this means that the logical + epoch will start at the point when the first worker in the cluster + starts. + + If all of the workers are shutdown the clock value will be lost + and reset to 0, to protect against this you should specify + a :option:`--statedb` so that the worker can persist the clock + value at shutdown. + + You may notice that the logical clock is an integer value and + increases very rapidly. Do not worry about the value overflowing + though, as even in the most busy clusters it may take several + millennia before the clock exceeds a 64 bits value. + +New worker node name format (``name@host``) +------------------------------------------- + +Node names are now constructed by two elements: name and hostname separated by '@'. + +This change was made to more easily identify multiple instances running +on the same machine. + +If a custom name is not specified then the +worker will use the name 'celery' by default, resulting in a +fully qualified node name of 'celery@hostname': + +.. code-block:: bash + + $ celery worker -n example.com + celery@example.com + +To also set the name you must include the @: + +.. code-block:: bash + + $ celery worker -n worker1@example.com + worker1@example.com + +The worker will identify itself using the fully qualified +node name in events and broadcast messages, so where before +a worker would identify itself as 'worker1.example.com', it will now +use 'celery@worker1.example.com'. + +Remember that the ``-n`` argument also supports simple variable +substitutions, so if the current hostname is *george.example.com* +then the ``%h`` macro will expand into that: + +.. code-block:: bash + + $ celery worker -n worker1@%h + worker1@george.example.com + +The available substitutions are as follows: + ++---------------+---------------------------------------+ +| Variable | Substitution | ++===============+=======================================+ +| ``%h`` | Full hostname (including domain name) | ++---------------+---------------------------------------+ +| ``%d`` | Domain name only | ++---------------+---------------------------------------+ +| ``%n`` | Hostname only (without domain name) | ++---------------+---------------------------------------+ +| ``%%`` | The character ``%`` | ++---------------+---------------------------------------+ + +Bound tasks +----------- + +The task decorator can now create "bound tasks", which means that the +task will receive the ``self`` argument. + +.. code-block:: python + + @app.task(bind=True) + def send_twitter_status(self, oauth, tweet): + try: + twitter = Twitter(oauth) + twitter.update_status(tweet) + except (Twitter.FailWhaleError, Twitter.LoginError) as exc: + raise self.retry(exc=exc) + +Using *bound tasks* is now the recommended approach whenever +you need access to the task instance or request context. +Previously one would have to refer to the name of the task +instead (``send_twitter_status.retry``), but this could lead to problems +in some configurations. + +Mingle: Worker synchronization +------------------------------ + +The worker will now attempt to synchronize with other workers in +the same cluster. + +Synchronized data currently includes revoked tasks and logical clock. + +This only happens at startup and causes a one second startup delay +to collect broadcast responses from other workers. + +You can disable this bootstep using the ``--without-mingle`` argument. + +Gossip: Worker <-> Worker communication +--------------------------------------- + +Workers are now passively subscribing to worker related events like +heartbeats. + +This means that a worker knows what other workers are doing and +can detect if they go offline. Currently this is only used for clock +synchronization, but there are many possibilities for future additions +and you can write extensions that take advantage of this already. + +Some ideas include consensus protocols, reroute task to best worker (based on +resource usage or data locality) or restarting workers when they crash. + +We believe that this is a small addition but one that really opens +up for amazing possibilities. + +You can disable this bootstep using the ``--without-gossip`` argument. + +Bootsteps: Extending the worker +------------------------------- + +By writing bootsteps you can now easily extend the consumer part +of the worker to add additional features, like custom message consumers. + +The worker has been using bootsteps for some time, but these were never +documented. In this version the consumer part of the worker +has also been rewritten to use bootsteps and the new :ref:`guide-extending` +guide documents examples extending the worker, including adding +custom message consumers. + +See the :ref:`guide-extending` guide for more information. + +.. note:: + + Bootsteps written for older versions will not be compatible + with this version, as the API has changed significantly. + + The old API was experimental and internal but should you be so unlucky + to use it then please contact the mailing-list and we will help you port + the bootstep to the new API. + +New RPC result backend +---------------------- + +This new experimental version of the ``amqp`` result backend is a good +alternative to use in classical RPC scenarios, where the process that initiates +the task is always the process to retrieve the result. + +It uses Kombu to send and retrieve results, and each client +uses a unique queue for replies to be sent to. This avoids +the significant overhead of the original amqp result backend which creates +one queue per task. + +By default results sent using this backend will not persist, so they won't +survive a broker restart. You can enable +the :setting:`CELERY_RESULT_PERSISTENT` setting to change that. + +.. code-block:: python + + CELERY_RESULT_BACKEND = 'rpc' + CELERY_RESULT_PERSISTENT = True + +Note that chords are currently not supported by the RPC backend. + +Time limits can now be set by the client +---------------------------------------- + +Two new options have been added to the Calling API: ``time_limit`` and +``soft_time_limit``: + +.. code-block:: python + + >>> res = add.apply_async((2, 2), time_limit=10, soft_time_limit=8) + + >>> res = add.subtask((2, 2), time_limit=10, soft_time_limit=8).delay() + + >>> res = add.s(2, 2).set(time_limit=10, soft_time_limit=8).delay() + +Contributed by Mher Movsisyan. + +Redis: Broadcast messages and virtual hosts +------------------------------------------- + +Broadcast messages are currently seen by all virtual hosts when +using the Redis transport. You can now fix this by enabling a prefix to all channels +so that the messages are separated: + +.. code-block:: python + + BROKER_TRANSPORT_OPTIONS = {'fanout_prefix': True} + +Note that you'll not be able to communicate with workers running older +versions or workers that does not have this setting enabled. + +This setting will be the default in a future version. + +Related to Issue #1490. + +:mod:`pytz` replaces ``python-dateutil`` dependency +--------------------------------------------------- + +Celery no longer depends on the ``python-dateutil`` library, +but instead a new dependency on the :mod:`pytz` library was added. + +The :mod:`pytz` library was already recommended for accurate timezone support. + +This also means that dependencies are the same for both Python 2 and +Python 3, and that the :file:`requirements/default-py3k.txt` file has +been removed. + +Support for Setuptools extra requirements +----------------------------------------- + +Pip now supports the :mod:`setuptools` extra requirements format, +so we have removed the old bundles concept, and instead specify +setuptools extras. + +You install extras by specifying them inside brackets: + +.. code-block:: bash + + $ pip install celery[redis,mongodb] + +The above will install the dependencies for Redis and MongoDB. You can list +as many extras as you want. + + +.. warning:: + + You can't use the ``celery-with-*`` packages anymore, as these will not be + updated to use Celery 3.1. + ++-------------+-------------------------+---------------------------+ +| Extension | Requirement entry | Type | ++=============+=========================+===========================+ +| Redis | ``celery[redis]`` | transport, result backend | ++-------------+-------------------------+---------------------------+ +| MongoDB | ``celery[mongodb]`` | transport, result backend | ++-------------+-------------------------+---------------------------+ +| CouchDB | ``celery[couchdb]`` | transport | ++-------------+-------------------------+---------------------------+ +| Beanstalk | ``celery[beanstalk]`` | transport | ++-------------+-------------------------+---------------------------+ +| ZeroMQ | ``celery[zeromq]`` | transport | ++-------------+-------------------------+---------------------------+ +| Zookeeper | ``celery[zookeeper]`` | transport | ++-------------+-------------------------+---------------------------+ +| SQLAlchemy | ``celery[sqlalchemy]`` | transport, result backend | ++-------------+-------------------------+---------------------------+ +| librabbitmq | ``celery[librabbitmq]`` | transport (C amqp client) | ++-------------+-------------------------+---------------------------+ + +The complete list with examples is found in the :ref:`bundles` section. + +``subtask.__call__()`` now executes the task directly +----------------------------------------------------- + +A misunderstanding led to ``Signature.__call__`` being an alias of +``.delay`` but this does not conform to the calling API of ``Task`` which +calls the underlying task method. + +This means that: + +.. code-block:: python + + @app.task + def add(x, y): + return x + y + + add.s(2, 2)() + +now does the same as calling the task directly: + +.. code-block:: python + + add(2, 2) + +In Other News +------------- + +- Now depends on :ref:`Kombu 3.0 `. + +- Now depends on :mod:`billiard` version 3.3. + +- Worker will now crash if running as the root user with pickle enabled. + +- Canvas: ``group.apply_async`` and ``chain.apply_async`` no longer starts + separate task. + + That the group and chord primitives supported the "calling API" like other + subtasks was a nice idea, but it was useless in practice and often + confused users. If you still want this behavior you can define a + task to do it for you. + +- New method ``Signature.freeze()`` can be used to "finalize" + signatures/subtask. + + Regular signature: + + .. code-block:: python + + >>> s = add.s(2, 2) + >>> result = s.freeze() + >>> result + + >>> s.delay() + + + Group: + + .. code-block:: python + + >>> g = group(add.s(2, 2), add.s(4, 4)) + >>> result = g.freeze() + + >>> g() + + +- Chord exception behavior defined (Issue #1172). + + From this version the chord callback will change state to FAILURE + when a task part of a chord raises an exception. + + See more at :ref:`chord-errors`. + +- New ability to specify additional command line options + to the worker and beat programs. + + The :attr:`@Celery.user_options` attribute can be used + to add additional command-line arguments, and expects + optparse-style options: + + .. code-block:: python + + from celery import Celery + from celery.bin import Option + + app = Celery() + app.user_options['worker'].add( + Option('--my-argument'), + ) + + See the :ref:`guide-extending` guide for more information. + +- All events now include a ``pid`` field, which is the process id of the + process that sent the event. + +- Event heartbeats are now calculated based on the time when the event + was received by the monitor, and not the time reported by the worker. + + This means that a worker with an out-of-sync clock will no longer + show as 'Offline' in monitors. + + A warning is now emitted if the difference between the senders + time and the internal time is greater than 15 seconds, suggesting + that the clocks are out of sync. + +- Monotonic clock support. + + A monotonic clock is now used for timeouts and scheduling. + + The monotonic clock function is built-in starting from Python 3.4, + but we also have fallback implementations for Linux and OS X. + +- :program:`celery worker` now supports a ``--detach`` argument to start + the worker as a daemon in the background. + +- :class:`@events.Receiver` now sets a ``local_received`` field for incoming + events, which is set to the time of when the event was received. + +- :class:`@events.Dispatcher` now accepts a ``groups`` argument + which decides a white-list of event groups that will be sent. + + The type of an event is a string separated by '-', where the part + before the first '-' is the group. Currently there are only + two groups: ``worker`` and ``task``. + + A dispatcher instantiated as follows: + + .. code-block:: python + + app.events.Dispatcher(connection, groups=['worker']) + + will only send worker related events and silently drop any attempts + to send events related to any other group. + +- New :setting:`BROKER_FAILOVER_STRATEGY` setting. + + This setting can be used to change the transport failover strategy, + can either be a callable returning an iterable or the name of a + Kombu built-in failover strategy. Default is "round-robin". + + Contributed by Matt Wise. + +- ``Result.revoke`` will no longer wait for replies. + + You can add the ``reply=True`` argument if you really want to wait for + responses from the workers. + +- Better support for link and link_error tasks for chords. + + Contributed by Steeve Morin. + +- Worker: Now emits warning if the :setting:`CELERYD_POOL` setting is set + to enable the eventlet/gevent pools. + + The `-P` option should always be used to select the eventlet/gevent pool + to ensure that the patches are applied as early as possible. + + If you start the worker in a wrapper (like Django's manage.py) + then you must apply the patches manually, e.g. by creating an alternative + wrapper that monkey patches at the start of the program before importing + any other modules. + +- There's a now an 'inspect clock' command which will collect the current + logical clock value from workers. + +- `celery inspect stats` now contains the process id of the worker's main + process. + + Contributed by Mher Movsisyan. + +- New remote control command to dump a workers configuration. + + Example: + + .. code-block:: bash + + $ celery inspect conf + + Configuration values will be converted to values supported by JSON + where possible. + + Contributed by Mher Movisyan. + +- New settings :setting:`CELERY_EVENT_QUEUE_TTL` and + :setting:`CELERY_EVENT_QUEUE_EXPIRES`. + + These control when a monitors event queue is deleted, and for how long + events published to that queue will be visible. Only supported on + RabbitMQ. + +- New Couchbase result backend. + + This result backend enables you to store and retrieve task results + using `Couchbase`_. + + See :ref:`conf-couchbase-result-backend` for more information + about configuring this result backend. + + Contributed by Alain Masiero. + + .. _`Couchbase`: http://www.couchbase.com + +- CentOS init script now supports starting multiple worker instances. + + See the script header for details. + + Contributed by Jonathan Jordan. + +- ``AsyncResult.iter_native`` now sets default interval parameter to 0.5 + + Fix contributed by Idan Kamara + +- New setting :setting:`BROKER_LOGIN_METHOD`. + + This setting can be used to specify an alternate login method + for the AMQP transports. + + Contributed by Adrien Guinet + +- The ``dump_conf`` remote control command will now give the string + representation for types that are not JSON compatible. + +- Function `celery.security.setup_security` is now :func:`@setup_security`. + +- Task retry now propagates the message expiry value (Issue #980). + + The value is forwarded at is, so the expiry time will not change. + To update the expiry time you would have to pass a new expires + argument to ``retry()``. + +- Worker now crashes if a channel error occurs. + + Channel errors are transport specific and is the list of exceptions + returned by ``Connection.channel_errors``. + For RabbitMQ this means that Celery will crash if the equivalence + checks for one of the queues in :setting:`CELERY_QUEUES` mismatches, which + makes sense since this is a scenario where manual intervention is + required. + +- Calling ``AsyncResult.get()`` on a chain now propagates errors for previous + tasks (Issue #1014). + +- The parent attribute of ``AsyncResult`` is now reconstructed when using JSON + serialization (Issue #1014). + +- Worker disconnection logs are now logged with severity warning instead of + error. + + Contributed by Chris Adams. + +- ``events.State`` no longer crashes when it receives unknown event types. + +- SQLAlchemy Result Backend: New :setting:`CELERY_RESULT_DB_TABLENAMES` + setting can be used to change the name of the database tables used. + + Contributed by Ryan Petrello. + +- SQLAlchemy Result Backend: Now calls ``enginge.dispose`` after fork + (Issue #1564). + + If you create your own sqlalchemy engines then you must also + make sure that these are closed after fork in the worker: + + .. code-block:: python + + from multiprocessing.util import register_after_fork + + engine = create_engine(…) + register_after_fork(engine, engine.dispose) + +- A stress test suite for the Celery worker has been written. + + This is located in the ``funtests/stress`` directory in the git + repository. There's a README file there to get you started. + +- The logger named ``celery.concurrency`` has been renamed to ``celery.pool``. + +- New command line utility ``celery graph``. + + This utility creates graphs in GraphViz dot format. + + You can create graphs from the currently installed bootsteps: + + .. code-block:: bash + + # Create graph of currently installed bootsteps in both the worker + # and consumer namespaces. + $ celery graph bootsteps | dot -T png -o steps.png + + # Graph of the consumer namespace only. + $ celery graph bootsteps consumer | dot -T png -o consumer_only.png + + # Graph of the worker namespace only. + $ celery graph bootsteps worker | dot -T png -o worker_only.png + + Or graphs of workers in a cluster: + + .. code-block:: bash + + # Create graph from the current cluster + $ celery graph workers | dot -T png -o workers.png + + # Create graph from a specified list of workers + $ celery graph workers nodes:w1,w2,w3 | dot -T png workers.png + + # also specify the number of threads in each worker + $ celery graph workers nodes:w1,w2,w3 threads:2,4,6 + + # …also specify the broker and backend URLs shown in the graph + $ celery graph workers broker:amqp:// backend:redis:// + + # …also specify the max number of workers/threads shown (wmax/tmax), + # enumerating anything that exceeds that number. + $ celery graph workers wmax:10 tmax:3 + +- Changed the way that app instances are pickled. + + Apps can now define a ``__reduce_keys__`` method that is used instead + of the old ``AppPickler`` attribute. E.g. if your app defines a custom + 'foo' attribute that needs to be preserved when pickling you can define + a ``__reduce_keys__`` as such: + + .. code-block:: python + + import celery + + class Celery(celery.Celery): + + def __init__(self, *args, **kwargs): + super(Celery, self).__init__(*args, **kwargs) + self.foo = kwargs.get('foo') + + def __reduce_keys__(self): + return super(Celery, self).__reduce_keys__().update( + foo=self.foo, + ) + + This is a much more convenient way to add support for pickling custom + attributes. The old ``AppPickler`` is still supported but its use is + discouraged and we would like to remove it in a future version. + +- Ability to trace imports for debugging purposes. + + The :envvar:`C_IMPDEBUG` can be set to trace imports as they + occur: + + .. code-block:: bash + + $ C_IMDEBUG=1 celery worker -l info + + .. code-block:: bash + + $ C_IMPDEBUG=1 celery shell + +- Message headers now available as part of the task request. + + Example adding and retrieving a header value: + + .. code-block:: python + + @app.task(bind=True) + def t(self): + return self.request.headers.get('sender') + + >>> t.apply_async(headers={'sender': 'George Costanza'}) + +- New :signal:`before_task_publish` signal dispatched before a task message + is sent and can be used to modify the final message fields (Issue #1281). + +- New :signal:`after_task_publish` signal replaces the old :signal:`task_sent` + signal. + + The :signal:`task_sent` signal is now deprecated and should not be used. + +- New :signal:`worker_process_shutdown` signal is dispatched in the + prefork pool child processes as they exit. + + Contributed by Daniel M Taub. + +- ``celery.platforms.PIDFile`` renamed to :class:`celery.platforms.Pidfile`. + +- MongoDB Backend: Can now be configured using an URL: + + See :ref:`example-mongodb-result-config`. + +- MongoDB Backend: No longer using deprecated ``pymongo.Connection``. + +- MongoDB Backend: Now disables ``auto_start_request``. + +- MongoDB Backend: Now enables ``use_greenlets`` when eventlet/gevent is used. + +- ``subtask()`` / ``maybe_subtask()`` renamed to + ``signature()``/``maybe_signature()``. + + Aliases still available for backwards compatibility. + +- The ``correlation_id`` message property is now automatically set to the + id of the task. + +- The task message ``eta`` and ``expires`` fields now includes timezone + information. + +- All result backends ``store_result``/``mark_as_*`` methods must now accept + a ``request`` keyword argument. + +- Events now emit warning if the broken ``yajl`` library is used. + +- The :signal:`celeryd_init` signal now takes an extra keyword argument: + ``option``. + + This is the mapping of parsed command line arguments, and can be used to + prepare new preload arguments (``app.user_options['preload']``). + +- New callback: ``Celery.on_configure``. + + This callback is called when an app is about to be configured (a + configuration key is required). + +- Worker: No longer forks on :sig:`HUP`. + + This means that the worker will reuse the same pid for better + support with external process supervisors. + + Contributed by Jameel Al-Aziz. + +- Worker: The log message ``Got task from broker …`` was changed to + ``Received task …``. + +- Worker: The log message ``Skipping revoked task …`` was changed + to ``Discarding revoked task …``. + +- Optimization: Improved performance of ``ResultSet.join_native()``. + + Contributed by Stas Rudakou. + +- The :signal:`task_revoked` signal now accepts new ``request`` argument + (Issue #1555). + + The revoked signal is dispatched after the task request is removed from + the stack, so it must instead use the :class:`~celery.worker.job.Request` + object to get information about the task. + +- Worker: New :option:`-X` command line argument to exclude queues + (Issue #1399). + + The :option:`-X` argument is the inverse of the :option:`-Q` argument + and accepts a list of queues to exclude (not consume from): + + .. code-block:: bash + + # Consume from all queues in CELERY_QUEUES, but not the 'foo' queue. + $ celery worker -A proj -l info -X foo + +- Adds :envvar:`C_FAKEFORK` envvar for simple init script/multi debugging. + + This means that you can now do: + + .. code-block:: bash + + $ C_FAKEFORK=1 celery multi start 10 + + or: + + .. code-block:: bash + + $ C_FAKEFORK=1 /etc/init.d/celeryd start + + to avoid the daemonization step to see errors that are not visible + due to missing stdout/stderr. + + A ``dryrun`` command has been added to the generic init script that + enables this option. + +- New public API to push and pop from the current task stack: + + :func:`celery.app.push_current_task` and + :func:`celery.app.pop_current_task``. + +- ``RetryTaskError`` has been renamed to :exc:`~celery.exceptions.Retry`. + + The old name is still available for backwards compatibility. + +- New semi-predicate exception :exc:`~celery.exceptions.Reject`. + + This exception can be raised to ``reject``/``requeue`` the task message, + see :ref:`task-semipred-reject` for examples. + +- :ref:`Semipredicates ` documented: (Retry/Ignore/Reject). + + +.. _v310-removals: + +Scheduled Removals +================== + +- The ``BROKER_INSIST`` setting and the ``insist`` argument + to ``~@connection`` is no longer supported. + +- The ``CELERY_AMQP_TASK_RESULT_CONNECTION_MAX`` setting is no longer + supported. + + Use :setting:`BROKER_POOL_LIMIT` instead. + +- The ``CELERY_TASK_ERROR_WHITELIST`` setting is no longer supported. + + You should set the :class:`~celery.utils.mail.ErrorMail` attribute + of the task class instead. You can also do this using + :setting:`CELERY_ANNOTATIONS`: + + .. code-block:: python + + from celery import Celery + from celery.utils.mail import ErrorMail + + class MyErrorMail(ErrorMail): + whitelist = (KeyError, ImportError) + + def should_send(self, context, exc): + return isinstance(exc, self.whitelist) + + app = Celery() + app.conf.CELERY_ANNOTATIONS = { + '*': { + 'ErrorMail': MyErrorMails, + } + } + +- Functions that creates a broker connections no longer + supports the ``connect_timeout`` argument. + + This can now only be set using the :setting:`BROKER_CONNECTION_TIMEOUT` + setting. This is because functions no longer create connections + directly, but instead get them from the connection pool. + +- The ``CELERY_AMQP_TASK_RESULT_EXPIRES`` setting is no longer supported. + + Use :setting:`CELERY_TASK_RESULT_EXPIRES` instead. + +.. _v310-deprecations: + +Deprecations +============ + +See the :ref:`deprecation-timeline`. + +.. _v310-fixes: + +Fixes +===== + +- AMQP Backend: join did not convert exceptions when using the json + serializer. + +- Non-abstract task classes are now shared between apps (Issue #1150). + + Note that non-abstract task classes should not be used in the + new API. You should only create custom task classes when you + use them as a base class in the ``@task`` decorator. + + This fix ensure backwards compatibility with older Celery versions + so that non-abstract task classes works even if a module is imported + multiple times so that the app is also instantiated multiple times. + +- Worker: Workaround for Unicode errors in logs (Issue #427). + +- Task methods: ``.apply_async`` now works properly if args list is None + (Issue #1459). + +- Eventlet/gevent/solo/threads pools now properly handles :exc:`BaseException` + errors raised by tasks. + +- :control:`autoscale` and :control:`pool_grow`/:control:`pool_shrink` remote + control commands will now also automatically increase and decrease the + consumer prefetch count. + + Fix contributed by Daniel M. Taub. + +- ``celery control pool_`` commands did not coerce string arguments to int. + +- Redis/Cache chords: Callback result is now set to failure if the group + disappeared from the database (Issue #1094). + +- Worker: Now makes sure that the shutdown process is not initiated multiple + times. + +- Multi: Now properly handles both ``-f`` and ``--logfile`` options + (Issue #1541). + +.. _v310-internal: + +Internal changes +================ + +- Module ``celery.task.trace`` has been renamed to :mod:`celery.app.trace`. + +- Module ``celery.concurrency.processes`` has been renamed to + :mod:`celery.concurrency.prefork`. + +- Classes that no longer fall back to using the default app: + + - Result backends (:class:`celery.backends.base.BaseBackend`) + - :class:`celery.worker.WorkController` + - :class:`celery.worker.Consumer` + - :class:`celery.worker.job.Request` + + This means that you have to pass a specific app when instantiating + these classes. + +- ``EventDispatcher.copy_buffer`` renamed to + :meth:`@events.Dispatcher.extend_buffer`. + +- Removed unused and never documented global instance + ``celery.events.state.state``. + +- :class:`@events.Receiver` is now a :class:`kombu.mixins.ConsumerMixin` + subclass. + +- :class:`celery.apps.worker.Worker` has been refactored as a subclass of + :class:`celery.worker.WorkController`. + + This removes a lot of duplicate functionality. + +- The ``Celery.with_default_connection`` method has been removed in favor + of ``with app.connection_or_acquire``. + +- The ``celery.results.BaseDictBackend`` class has been removed and is replaced by + :class:`celery.results.BaseBackend`. diff --git a/examples/README.rst b/examples/README.rst new file mode 100644 index 0000000..f4b817a --- /dev/null +++ b/examples/README.rst @@ -0,0 +1,18 @@ +================= + Celery Examples +================= + + +* pythonproject + +Example Python project using celery. + +* httpexample + +Example project using remote tasks (webhook tasks) + +* celery_http_gateway + +Example HTTP service exposing the ability to apply tasks and query the +resulting status/return value. + diff --git a/examples/app/myapp.py b/examples/app/myapp.py new file mode 100644 index 0000000..51a624b --- /dev/null +++ b/examples/app/myapp.py @@ -0,0 +1,39 @@ +"""myapp.py + +Usage: + + (window1)$ python myapp.py worker -l info + + (window2)$ python + >>> from myapp import add + >>> add.delay(16, 16).get() + 32 + + +You can also specify the app to use with the `celery` command, +using the `-A` / `--app` option:: + + $ celery -A myapp worker -l info + +With the `-A myproj` argument the program will search for an app +instance in the module ``myproj``. You can also specify an explicit +name using the fully qualified form:: + + $ celery -A myapp:app worker -l info + +""" +from celery import Celery + +app = Celery( + 'myapp', + broker='amqp://guest@localhost//', + # add result backend here if needed. + #backend='rpc' +) + +@app.task() +def add(x, y): + return x + y + +if __name__ == '__main__': + app.start() diff --git a/examples/celery_http_gateway/README.rst b/examples/celery_http_gateway/README.rst new file mode 100644 index 0000000..9b19639 --- /dev/null +++ b/examples/celery_http_gateway/README.rst @@ -0,0 +1,40 @@ +============================== + Example Celery->HTTP Gateway +============================== + +This is an example service exposing the ability to apply tasks and query +statuses/results over HTTP. + +Some familiarity with Django is recommended. + +`settings.py` contains the celery settings, you probably want to configure +at least the broker related settings. + +To run the service you have to run the following commands:: + + $ python manage.py syncdb # (if running the database backend) + + $ python manage.py runserver + + +The service is now running at http://localhost:8000 + + +You can apply tasks, with the `/apply/` URL:: + + $ curl http://localhost:8000/apply/celery.ping/ + {"ok": "true", "task_id": "e3a95109-afcd-4e54-a341-16c18fddf64b"} + +Then you can use the resulting task-id to get the return value:: + + $ curl http://localhost:8000/e3a95109-afcd-4e54-a341-16c18fddf64b/status/ + {"task": {"status": "SUCCESS", "result": "pong", "id": "e3a95109-afcd-4e54-a341-16c18fddf64b"}} + + +If you don't want to expose all tasks there are a few possible +approaches. For instance you can extend the `apply` view to only +accept a whitelist. Another possibility is to just make views for every task you want to +expose. We made on such view for ping in `views.ping`:: + + $ curl http://localhost:8000/ping/ + {"ok": "true", "task_id": "383c902c-ba07-436b-b0f3-ea09cc22107c"} diff --git a/examples/celery_http_gateway/__init__.py b/examples/celery_http_gateway/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/examples/celery_http_gateway/manage.py b/examples/celery_http_gateway/manage.py new file mode 100644 index 0000000..45f284b --- /dev/null +++ b/examples/celery_http_gateway/manage.py @@ -0,0 +1,13 @@ +#!/usr/bin/env python +from django.core.management import execute_manager +try: + import settings # Assumed to be in the same directory. +except ImportError: + import sys + sys.stderr.write( + "Error: Can't find the file 'settings.py' in the directory " + "containing {0!r}.".format(__file__)) + sys.exit(1) + +if __name__ == '__main__': + execute_manager(settings) diff --git a/examples/celery_http_gateway/settings.py b/examples/celery_http_gateway/settings.py new file mode 100644 index 0000000..750f18a --- /dev/null +++ b/examples/celery_http_gateway/settings.py @@ -0,0 +1,98 @@ +# Django settings for celery_http_gateway project. + +import django + +DEBUG = True +TEMPLATE_DEBUG = DEBUG + +CARROT_BACKEND = 'amqp' +CELERY_RESULT_BACKEND = 'database' +BROKER_URL = 'amqp://guest:guest@localhost:5672//' + +ADMINS = ( + # ('Your Name', 'your_email@domain.com'), +) + +MANAGERS = ADMINS + +DATABASES = { + 'default': { + 'ENGINE': 'django.db.backends.sqlite3', + 'NAME': 'development.db', + 'USER': '', + 'PASSWORD': '', + 'HOST': '', + 'PORT': '', + } +} + +if django.VERSION[:3] < (1, 3): + DATABASE_ENGINE = DATABASES['default']['ENGINE'] + DATABASE_NAME = DATABASES['default']['NAME'] + DATABASE_USER = DATABASES['default']['USER'] + DATABASE_PASSWORD = DATABASES['default']['PASSWORD'] + DATABASE_HOST = DATABASES['default']['HOST'] + DATABASE_PORT = DATABASES['default']['PORT'] + +# Local time zone for this installation. Choices can be found here: +# http://en.wikipedia.org/wiki/List_of_tz_zones_by_name +# although not all choices may be available on all operating systems. +# If running in a Windows environment this must be set to the same as your +# system time zone. +TIME_ZONE = 'America/Chicago' + +# Language code for this installation. All choices can be found here: +# http://www.i18nguy.com/unicode/language-identifiers.html +LANGUAGE_CODE = 'en-us' + +SITE_ID = 1 + +# If you set this to False, Django will make some optimizations so as not +# to load the internationalization machinery. +USE_I18N = True + +# Absolute path to the directory that holds media. +# Example: "/home/media/media.lawrence.com/" +MEDIA_ROOT = '' + +# URL that handles the media served from MEDIA_ROOT. Make sure to use a +# trailing slash if there is a path component (optional in other cases). +# Examples: "http://media.lawrence.com", "http://example.com/media/" +MEDIA_URL = '' + +# URL prefix for admin media -- CSS, JavaScript and images. Make sure to use a +# trailing slash. +# Examples: "http://foo.com/media/", "/media/". +ADMIN_MEDIA_PREFIX = '/media/' + +# Make this unique, and don't share it with anybody. +SECRET_KEY = '#1i=edpk55k3781$z-p%b#dbn&n+-rtt83pgz2o9o)v8g7(owq' + +# List of callables that know how to import templates from various sources. +TEMPLATE_LOADERS = ( + 'django.template.loaders.filesystem.load_template_source', + 'django.template.loaders.app_directories.load_template_source', +) + +MIDDLEWARE_CLASSES = ( + 'django.middleware.common.CommonMiddleware', + 'django.contrib.sessions.middleware.SessionMiddleware', + 'django.contrib.auth.middleware.AuthenticationMiddleware', +) + +ROOT_URLCONF = 'celery_http_gateway.urls' + +TEMPLATE_DIRS = ( + # Put strings here, like "/home/html/django_templates" or + # "C:/www/django/templates". + # Always use forward slashes, even on Windows. + # Don't forget to use absolute paths, not relative paths. +) + +INSTALLED_APPS = ( + 'django.contrib.auth', + 'django.contrib.contenttypes', + 'django.contrib.sessions', + 'django.contrib.sites', + 'djcelery', +) diff --git a/examples/celery_http_gateway/tasks.py b/examples/celery_http_gateway/tasks.py new file mode 100644 index 0000000..c5bcd61 --- /dev/null +++ b/examples/celery_http_gateway/tasks.py @@ -0,0 +1,6 @@ +from celery import task + + +@task() +def hello_world(to='world'): + return 'Hello {0}'.format(to) diff --git a/examples/celery_http_gateway/urls.py b/examples/celery_http_gateway/urls.py new file mode 100644 index 0000000..f99136d --- /dev/null +++ b/examples/celery_http_gateway/urls.py @@ -0,0 +1,21 @@ +from django.conf.urls.defaults import ( # noqa + url, patterns, include, handler404, handler500, +) + +from djcelery import views as celery_views + +from celery_http_gateway.tasks import hello_world + +# Uncomment the next two lines to enable the admin: +# from django.contrib import admin +# admin.autodiscover() + +urlpatterns = patterns( + '', + url(r'^apply/(?P.+?)/', celery_views.apply), + url(r'^hello/', celery_views.task_view(hello_world)), + url(r'^(?P[\w\d\-]+)/done/?$', celery_views.is_task_successful, + name='celery-is_task_successful'), + url(r'^(?P[\w\d\-]+)/status/?$', celery_views.task_status, + name='celery-task_status'), +) diff --git a/examples/django/README.rst b/examples/django/README.rst new file mode 100644 index 0000000..9eebc02 --- /dev/null +++ b/examples/django/README.rst @@ -0,0 +1,36 @@ +============================================================== + Example Django project using Celery +============================================================== + +Contents +======== + +``proj/`` +--------- + +This is the project iself, created using +``django-admin.py startproject proj``, and then the settings module +(``proj/settings.py``) was modified to add ``demoapp`` to +``INSTALLED_APPS`` + +``proj/celery.py`` +---------- + +This module contains the Celery application instance for this project, +we take configuration from Django settings and use ``autodiscover_tasks`` to +find task modules inside all packages listed in ``INSTALLED_APPS``. + +``demoapp/`` +------------ + +Example generic app. This is decoupled from the rest of the project by using +the ``@shared_task`` decorator. This decorator returns a proxy that always +points to the currently active Celery instance. + + +Starting the worker +=================== + +.. code-block:: bash + + $ celery -A proj worker -l info diff --git a/examples/django/demoapp/__init__.py b/examples/django/demoapp/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/examples/django/demoapp/models.py b/examples/django/demoapp/models.py new file mode 100644 index 0000000..9d57c55 --- /dev/null +++ b/examples/django/demoapp/models.py @@ -0,0 +1,3 @@ +from django.db import models # noqa + +# Create your models here. diff --git a/examples/django/demoapp/tasks.py b/examples/django/demoapp/tasks.py new file mode 100644 index 0000000..2af031e --- /dev/null +++ b/examples/django/demoapp/tasks.py @@ -0,0 +1,18 @@ +from __future__ import absolute_import + +from celery import shared_task + + +@shared_task +def add(x, y): + return x + y + + +@shared_task +def mul(x, y): + return x * y + + +@shared_task +def xsum(numbers): + return sum(numbers) diff --git a/examples/django/demoapp/tests.py b/examples/django/demoapp/tests.py new file mode 100644 index 0000000..501deb7 --- /dev/null +++ b/examples/django/demoapp/tests.py @@ -0,0 +1,16 @@ +""" +This file demonstrates writing tests using the unittest module. These will pass +when you run "manage.py test". + +Replace this with more appropriate tests for your application. +""" + +from django.test import TestCase + + +class SimpleTest(TestCase): + def test_basic_addition(self): + """ + Tests that 1 + 1 always equals 2. + """ + self.assertEqual(1 + 1, 2) diff --git a/examples/django/demoapp/views.py b/examples/django/demoapp/views.py new file mode 100644 index 0000000..60f00ef --- /dev/null +++ b/examples/django/demoapp/views.py @@ -0,0 +1 @@ +# Create your views here. diff --git a/examples/django/manage.py b/examples/django/manage.py new file mode 100644 index 0000000..a8fd787 --- /dev/null +++ b/examples/django/manage.py @@ -0,0 +1,10 @@ +#!/usr/bin/env python +import os +import sys + +if __name__ == "__main__": + os.environ.setdefault("DJANGO_SETTINGS_MODULE", "proj.settings") + + from django.core.management import execute_from_command_line + + execute_from_command_line(sys.argv) diff --git a/examples/django/proj/__init__.py b/examples/django/proj/__init__.py new file mode 100644 index 0000000..b64e43e --- /dev/null +++ b/examples/django/proj/__init__.py @@ -0,0 +1,5 @@ +from __future__ import absolute_import + +# This will make sure the app is always imported when +# Django starts so that shared_task will use this app. +from .celery import app as celery_app diff --git a/examples/django/proj/celery.py b/examples/django/proj/celery.py new file mode 100644 index 0000000..aebb108 --- /dev/null +++ b/examples/django/proj/celery.py @@ -0,0 +1,22 @@ +from __future__ import absolute_import + +import os + +from celery import Celery + +from django.conf import settings + +# set the default Django settings module for the 'celery' program. +os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'proj.settings') + +app = Celery('proj') + +# Using a string here means the worker will not have to +# pickle the object when using Windows. +app.config_from_object('django.conf:settings') +app.autodiscover_tasks(lambda: settings.INSTALLED_APPS) + + +@app.task(bind=True) +def debug_task(self): + print('Request: {0!r}'.format(self.request)) diff --git a/examples/django/proj/settings.py b/examples/django/proj/settings.py new file mode 100644 index 0000000..fe2beec --- /dev/null +++ b/examples/django/proj/settings.py @@ -0,0 +1,170 @@ +from __future__ import absolute_import +# ^^^ The above is required if you want to import from the celery +# library. If you don't have this then `from celery.schedules import` +# becomes `proj.celery.schedules` in Python 2.x since it allows +# for relative imports by default. + +# Celery settings + +BROKER_URL = 'amqp://guest:guest@localhost//' + +#: Only add pickle to this list if your broker is secured +#: from unwanted access (see userguide/security.html) +CELERY_ACCEPT_CONTENT = ['json'] +CELERY_TASK_SERIALIZER = 'json' +CELERY_RESULT_SERIALIZER = 'json' + +# Django settings for proj project. + +DEBUG = True +TEMPLATE_DEBUG = DEBUG + +ADMINS = ( + # ('Your Name', 'your_email@example.com'), +) + +MANAGERS = ADMINS + +DATABASES = { + 'default': { + # Add 'postgresql_psycopg2', 'mysql', 'sqlite3' or 'oracle'. + 'ENGINE': 'django.db.backends.sqlite3', + 'NAME': 'test.db', # path to database file if using sqlite3. + 'USER': '', # Not used with sqlite3. + 'PASSWORD': '', # Not used with sqlite3. + 'HOST': '', # Set to empty string for localhost. + # Not used with sqlite3. + 'PORT': '', # Set to empty string for default. + # Not used with sqlite3. + } +} + +# Local time zone for this installation. Choices can be found here: +# http://en.wikipedia.org/wiki/List_of_tz_zones_by_name +# although not all choices may be available on all operating systems. +# In a Windows environment this must be set to your system time zone. +TIME_ZONE = 'America/Chicago' + +# Language code for this installation. All choices can be found here: +# http://www.i18nguy.com/unicode/language-identifiers.html +LANGUAGE_CODE = 'en-us' + +SITE_ID = 1 + +# If you set this to False, Django will make some optimizations so as not +# to load the internationalization machinery. +USE_I18N = True + +# If you set this to False, Django will not format dates, numbers and +# calendars according to the current locale. +USE_L10N = True + +# If you set this to False, Django will not use timezone-aware datetimes. +USE_TZ = True + +# Absolute filesystem path to the directory that will hold user-uploaded files. +# Example: "/home/media/media.lawrence.com/media/" +MEDIA_ROOT = '' + +# URL that handles the media served from MEDIA_ROOT. Make sure to use a +# trailing slash. +# Examples: "http://media.lawrence.com/media/", "http://example.com/media/" +MEDIA_URL = '' + +# Absolute path to the directory static files should be collected to. +# Don't put anything in this directory yourself; store your static files +# in apps' "static/" subdirectories and in STATICFILES_DIRS. +# Example: "/home/media/media.lawrence.com/static/" +STATIC_ROOT = '' + +# URL prefix for static files. +# Example: "http://media.lawrence.com/static/" +STATIC_URL = '/static/' + +# Additional locations of static files +STATICFILES_DIRS = ( + # Put strings here, like "/home/html/static" or "C:/www/django/static". + # Always use forward slashes, even on Windows. + # Don't forget to use absolute paths, not relative paths. +) + +# List of finder classes that know how to find static files in +# various locations. +STATICFILES_FINDERS = ( + 'django.contrib.staticfiles.finders.FileSystemFinder', + 'django.contrib.staticfiles.finders.AppDirectoriesFinder', +) + +# Make this unique, and don't share it with anybody. +SECRET_KEY = 'x2$s&0z2xehpnt_99i8q3)4)t*5q@+n(+6jrqz4@rt%a8fdf+!' + +# List of callables that know how to import templates from various sources. +TEMPLATE_LOADERS = ( + 'django.template.loaders.filesystem.Loader', + 'django.template.loaders.app_directories.Loader', +) + +MIDDLEWARE_CLASSES = ( + 'django.middleware.common.CommonMiddleware', + 'django.contrib.sessions.middleware.SessionMiddleware', + 'django.middleware.csrf.CsrfViewMiddleware', + 'django.contrib.auth.middleware.AuthenticationMiddleware', + 'django.contrib.messages.middleware.MessageMiddleware', + # Uncomment the next line for simple clickjacking protection: + # 'django.middleware.clickjacking.XFrameOptionsMiddleware', +) + +ROOT_URLCONF = 'proj.urls' + +# Python dotted path to the WSGI application used by Django's runserver. +WSGI_APPLICATION = 'proj.wsgi.application' + +TEMPLATE_DIRS = ( + # Put strings here, like "/home/html/django_templates" + # or "C:/www/django/templates". + # Always use forward slashes, even on Windows. + # Don't forget to use absolute paths, not relative paths. +) + +INSTALLED_APPS = ( + 'django.contrib.auth', + 'django.contrib.contenttypes', + 'django.contrib.sessions', + 'django.contrib.sites', + 'django.contrib.messages', + 'django.contrib.staticfiles', + 'demoapp', + # Uncomment the next line to enable the admin: + # 'django.contrib.admin', + # Uncomment the next line to enable admin documentation: + # 'django.contrib.admindocs', +) + +# A sample logging configuration. The only tangible logging +# performed by this configuration is to send an email to +# the site admins on every HTTP 500 error when DEBUG=False. +# See http://docs.djangoproject.com/en/dev/topics/logging for +# more details on how to customize your logging configuration. +LOGGING = { + 'version': 1, + 'disable_existing_loggers': False, + 'filters': { + 'require_debug_false': { + '()': 'django.utils.log.RequireDebugFalse' + } + }, + 'handlers': { + 'mail_admins': { + 'level': 'ERROR', + 'filters': ['require_debug_false'], + 'class': 'django.utils.log.AdminEmailHandler' + } + }, + 'loggers': { + 'django.request': { + 'handlers': ['mail_admins'], + 'level': 'ERROR', + 'propagate': True, + }, + } +} diff --git a/examples/django/proj/urls.py b/examples/django/proj/urls.py new file mode 100644 index 0000000..f991d65 --- /dev/null +++ b/examples/django/proj/urls.py @@ -0,0 +1,20 @@ +from django.conf.urls import ( # noqa + patterns, include, url, handler404, handler500, +) + +# Uncomment the next two lines to enable the admin: +# from django.contrib import admin +# admin.autodiscover() + +urlpatterns = patterns( + '', + # Examples: + # url(r'^$', 'proj.views.home', name='home'), + # url(r'^proj/', include('proj.foo.urls')), + + # Uncomment the admin/doc line below to enable admin documentation: + # url(r'^admin/doc/', include('django.contrib.admindocs.urls')), + + # Uncomment the next line to enable the admin: + # url(r'^admin/', include(admin.site.urls)), +) diff --git a/examples/django/proj/wsgi.py b/examples/django/proj/wsgi.py new file mode 100644 index 0000000..446fcc9 --- /dev/null +++ b/examples/django/proj/wsgi.py @@ -0,0 +1,28 @@ +""" +WSGI config for proj project. + +This module contains the WSGI application used by Django's development server +and any production WSGI deployments. It should expose a module-level variable +named ``application``. Django's ``runserver`` and ``runfcgi`` commands discover +this application via the ``WSGI_APPLICATION`` setting. + +Usually you will have the standard Django WSGI application here, but it also +might make sense to replace the whole Django WSGI application with a custom one +that later delegates to the Django one. For example, you could introduce WSGI +middleware here, or combine a Django application with an application of another +framework. + +""" +import os + +os.environ.setdefault("DJANGO_SETTINGS_MODULE", "proj.settings") + +# This application object is used by any WSGI server configured to use this +# file. This includes Django's development server, if the WSGI_APPLICATION +# setting points here. +from django.core.wsgi import get_wsgi_application +application = get_wsgi_application() + +# Apply WSGI middleware here. +# from helloworld.wsgi import HelloWorldApplication +# application = HelloWorldApplication(application) diff --git a/examples/eventlet/README.rst b/examples/eventlet/README.rst new file mode 100644 index 0000000..6bf00e9 --- /dev/null +++ b/examples/eventlet/README.rst @@ -0,0 +1,55 @@ +================================== + Example using the Eventlet Pool +================================== + +Introduction +============ + +This is a Celery application containing two example tasks. + +First you need to install Eventlet, and also recommended is the `dnspython` +module (when this is installed all name lookups will be asynchronous):: + + $ pip install eventlet + $ pip install dnspython + $ pip install requests + +Before you run any of the example tasks you need to start +the worker:: + + $ cd examples/eventlet + $ celery worker -l info --concurrency=500 --pool=eventlet + +As usual you need to have RabbitMQ running, see the Celery getting started +guide if you haven't installed it yet. + +Tasks +===== + +* `tasks.urlopen` + +This task simply makes a request opening the URL and returns the size +of the response body:: + + $ cd examples/eventlet + $ python + >>> from tasks import urlopen + >>> urlopen.delay("http://www.google.com/").get() + 9980 + +To open several URLs at once you can do:: + + $ cd examples/eventlet + $ python + >>> from tasks import urlopen + >>> from celery import group + >>> result = group(urlopen.s(url) + ... for url in LIST_OF_URLS).apply_async() + >>> for incoming_result in result.iter_native(): + ... print(incoming_result, ) + +* `webcrawler.crawl` + +This is a simple recursive web crawler. It will only crawl +URLs for the current host name. Please see comments in the +`webcrawler.py` file. diff --git a/examples/eventlet/bulk_task_producer.py b/examples/eventlet/bulk_task_producer.py new file mode 100644 index 0000000..2002160 --- /dev/null +++ b/examples/eventlet/bulk_task_producer.py @@ -0,0 +1,60 @@ + +from eventlet import spawn_n, monkey_patch, Timeout +from eventlet.queue import LightQueue +from eventlet.event import Event + +from celery import current_app + +monkey_patch() + + +class Receipt(object): + result = None + + def __init__(self, callback=None): + self.callback = None + self.ready = Event() + + def finished(self, result): + self.result = result + if self.callback: + self.callback(result) + self.ready.send() + + def wait(self, timeout=None): + with Timeout(timeout): + return self.ready.wait() + + +class ProducerPool(object): + Receipt = Receipt + + def __init__(self, size=20): + self.size = size + self.inqueue = LightQueue() + self._running = None + self._producers = None + + def apply_async(self, task, args, kwargs, callback=None, **options): + if self._running is None: + self._running = spawn_n(self._run) + receipt = self.Receipt(callback) + self.inqueue.put((task, args, kwargs, options, receipt)) + return receipt + + def _run(self): + self._producers = [ + spawn_n(self._producer) for _ in range(self.size) + ] + + def _producer(self): + connection = current_app.connection() + publisher = current_app.amqp.TaskProducer(connection) + inqueue = self.inqueue + + while 1: + task, args, kwargs, options, receipt = inqueue.get() + result = task.apply_async(args, kwargs, + publisher=publisher, + **options) + receipt.finished(result) diff --git a/examples/eventlet/celeryconfig.py b/examples/eventlet/celeryconfig.py new file mode 100644 index 0000000..a816c00 --- /dev/null +++ b/examples/eventlet/celeryconfig.py @@ -0,0 +1,14 @@ +import os +import sys +sys.path.insert(0, os.getcwd()) + +## Start worker with -P eventlet +# Never use the CELERYD_POOL setting as that will patch +# the worker too late. + +BROKER_URL = 'amqp://guest:guest@localhost:5672//' +CELERY_DISABLE_RATE_LIMITS = True +CELERY_RESULT_BACKEND = 'amqp' +CELERY_TASK_RESULT_EXPIRES = 30 * 60 + +CELERY_IMPORTS = ('tasks', 'webcrawler') diff --git a/examples/eventlet/tasks.py b/examples/eventlet/tasks.py new file mode 100644 index 0000000..af32adb --- /dev/null +++ b/examples/eventlet/tasks.py @@ -0,0 +1,13 @@ +import requests + +from celery import task + + +@task() +def urlopen(url): + print('Opening: {0}'.format(url)) + try: + response = requests.get(url) + except Exception as exc: + print('URL {0} gave error: {1!r}'.format(url, exc)) + return len(response.text) diff --git a/examples/eventlet/webcrawler.py b/examples/eventlet/webcrawler.py new file mode 100644 index 0000000..a8328b6 --- /dev/null +++ b/examples/eventlet/webcrawler.py @@ -0,0 +1,71 @@ +"""Recursive webcrawler example. + +For asynchronous DNS lookups install the `dnspython` package: + + $ pip install dnspython + +Requires the `pybloom` module for the bloom filter which is used +to ensure a lower chance of recrawling an URL previously seen. + +Since the bloom filter is not shared, but only passed as an argument +to each subtask, it would be much better to have this as a centralized +service. Redis sets could also be a practical solution. + +A BloomFilter with a capacity of 100_000 members and an error rate +of 0.001 is 2.8MB pickled, but if compressed with zlib it only takes +up 2.9kB(!). + +We don't have to do compression manually, just set the tasks compression +to "zlib", and the serializer to "pickle". + + +""" + + +import re + +try: + from urllib.parse import urlsplit +except ImportError: + from urlparse import urlsplit # noqa + +import requests + +from celery import task, group +from eventlet import Timeout + +from pybloom import BloomFilter + +# http://daringfireball.net/2009/11/liberal_regex_for_matching_urls +url_regex = re.compile( + r'\b(([\w-]+://?|www[.])[^\s()<>]+(?:\([\w\d]+\)|([^[:punct:]\s]|/)))') + + +def domain(url): + """Return the domain part of an URL.""" + return urlsplit(url)[1].split(':')[0] + + +@task(ignore_result=True, serializer='pickle', compression='zlib') +def crawl(url, seen=None): + print('crawling: {0}'.format(url)) + if not seen: + seen = BloomFilter(capacity=50000, error_rate=0.0001) + + with Timeout(5, False): + try: + response = requests.get(url) + except Exception: + return + + location = domain(url) + wanted_urls = [] + for url_match in url_regex.finditer(response.text): + url = url_match.group(0) + # To not destroy the internet, we only fetch URLs on the same domain. + if url not in seen and location in domain(url): + wanted_urls.append(url) + seen.add(url) + + subtasks = group(crawl.s(url, seen) for url in wanted_urls) + subtasks() diff --git a/examples/gevent/celeryconfig.py b/examples/gevent/celeryconfig.py new file mode 100644 index 0000000..36d6a6c --- /dev/null +++ b/examples/gevent/celeryconfig.py @@ -0,0 +1,13 @@ +import os +import sys +sys.path.insert(0, os.getcwd()) + +### Note: Start worker with -P gevent, +# do not use the CELERYD_POOL option. + +BROKER_URL = 'amqp://guest:guest@localhost:5672//' +CELERY_DISABLE_RATE_LIMITS = True +CELERY_RESULT_BACKEND = 'amqp' +CELERY_TASK_RESULT_EXPIRES = 30 * 60 + +CELERY_IMPORTS = ('tasks', ) diff --git a/examples/gevent/tasks.py b/examples/gevent/tasks.py new file mode 100644 index 0000000..7b5624d --- /dev/null +++ b/examples/gevent/tasks.py @@ -0,0 +1,15 @@ +import requests + +from celery import task + + +@task(ignore_result=True) +def urlopen(url): + print('Opening: {0}'.format(url)) + try: + requests.get(url) + except Exception as exc: + print('Exception for {0}: {1!r}'.format(url, exc)) + return url, 0 + print('Done with: {0}'.format(url)) + return url, 1 diff --git a/examples/httpexample/README.rst b/examples/httpexample/README.rst new file mode 100644 index 0000000..e7ad392 --- /dev/null +++ b/examples/httpexample/README.rst @@ -0,0 +1,33 @@ +====================== + Webhook Task Example +====================== + +This example is a simple Django HTTP service exposing a single task +multiplying two numbers: + +The multiply http callback task is in `views.py`, mapped to a URL using +`urls.py`. + +There are no models, so to start it do:: + + $ python manage.py runserver + +To execute the task you could use curl:: + + $ curl http://localhost:8000/multiply?x=10&y=10 + +which then gives the expected JSON response:: + + {"status": "success": "retval": 100} + + +To execute this http callback task asynchronously you could fire up +a python shell with a properly configured celery and do: + + >>> from celery.task.http import URL + >>> res = URL("http://localhost:8000/multiply").get_async(x=10, y=10) + >>> res.wait() + 100 + + +That's all! diff --git a/examples/httpexample/__init__.py b/examples/httpexample/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/examples/httpexample/manage.py b/examples/httpexample/manage.py new file mode 100644 index 0000000..3cf8fe5 --- /dev/null +++ b/examples/httpexample/manage.py @@ -0,0 +1,13 @@ +#!/usr/bin/env python +from django.core.management import execute_manager +try: + from . import settings # Assumed to be in the same directory. +except ImportError: + import sys + sys.stderr.write( + "Error: Can't find the file 'settings.py' in the directory " + "containing {0!r}.".format(__file__)) + sys.exit(1) + +if __name__ == '__main__': + execute_manager(settings) diff --git a/examples/httpexample/settings.py b/examples/httpexample/settings.py new file mode 100644 index 0000000..650dff3 --- /dev/null +++ b/examples/httpexample/settings.py @@ -0,0 +1,89 @@ +# Django settings for httpexample project. + +DEBUG = True +TEMPLATE_DEBUG = DEBUG + +ADMINS = ( + # ('Your Name', 'your_email@domain.com'), +) + +MANAGERS = ADMINS +# 'postgresql_psycopg2', 'postgresql', 'mysql', 'sqlite3' or 'oracle'. +DATABASE_ENGINE = '' + +# Pth to database file if using sqlite3. +DATABASE_NAME = '' + +# Not used with sqlite3. +DATABASE_USER = '' + +# Not used with sqlite3. +DATABASE_PASSWORD = '' + +# Set to empty string for localhost. Not used with sqlite3. +DATABASE_HOST = '' + +# Set to empty string for default. Not used with sqlite3. +DATABASE_PORT = '' + +# Local time zone for this installation. Choices can be found here: +# http://en.wikipedia.org/wiki/List_of_tz_zones_by_name +# although not all choices may be available on all operating systems. +# If running in a Windows environment this must be set to the same as your +# system time zone. +TIME_ZONE = 'America/Chicago' + +# Language code for this installation. All choices can be found here: +# http://www.i18nguy.com/unicode/language-identifiers.html +LANGUAGE_CODE = 'en-us' + +SITE_ID = 1 + +# If you set this to False, Django will make some optimizations so as not +# to load the internationalization machinery. +USE_I18N = True + +# Absolute path to the directory that holds media. +# Example: "/home/media/media.lawrence.com/" +MEDIA_ROOT = '' + +# URL that handles the media served from MEDIA_ROOT. Make sure to use a +# trailing slash if there is a path component (optional in other cases). +# Examples: "http://media.lawrence.com", "http://example.com/media/" +MEDIA_URL = '' + +# URL prefix for admin media -- CSS, JavaScript and images. Make sure to use a +# trailing slash. +# Examples: "http://foo.com/media/", "/media/". +ADMIN_MEDIA_PREFIX = '/media/' + +# Make this unique, and don't share it with anybody. +SECRET_KEY = 'p^@q$@nal#-0+w@v_3bcj2ug(zbh5_m2on8^kkn&!e!b=a@o__' + +# List of callables that know how to import templates from various sources. +TEMPLATE_LOADERS = ( + 'django.template.loaders.filesystem.load_template_source', + 'django.template.loaders.app_directories.load_template_source', +) + +MIDDLEWARE_CLASSES = ( + 'django.middleware.common.CommonMiddleware', + 'django.contrib.sessions.middleware.SessionMiddleware', + 'django.contrib.auth.middleware.AuthenticationMiddleware', +) + +ROOT_URLCONF = 'httpexample.urls' + +TEMPLATE_DIRS = ( + # Put strings here, like "/home/html/django_templates" or + # "C:/www/django/templates". + # Always use forward slashes, even on Windows. + # Don't forget to use absolute paths, not relative paths. +) + +INSTALLED_APPS = ( + 'django.contrib.auth', + 'django.contrib.contenttypes', + 'django.contrib.sessions', + 'django.contrib.sites', +) diff --git a/examples/httpexample/urls.py b/examples/httpexample/urls.py new file mode 100644 index 0000000..ccdc2f2 --- /dev/null +++ b/examples/httpexample/urls.py @@ -0,0 +1,13 @@ +from django.conf.urls.defaults import ( # noqa + url, patterns, include, handler500, handler404, +) +from . import views + +# Uncomment the next two lines to enable the admin: +# from django.contrib import admin +# admin.autodiscover() + +urlpatterns = patterns( + '', + url(r'^multiply/', views.multiply, name='multiply'), +) diff --git a/examples/httpexample/views.py b/examples/httpexample/views.py new file mode 100644 index 0000000..5069255 --- /dev/null +++ b/examples/httpexample/views.py @@ -0,0 +1,12 @@ +from django.http import HttpResponse + +from anyjson import dumps + + +def multiply(request): + x = int(request.GET['x']) + y = int(request.GET['y']) + + retval = x * y + response = {'status': 'success', 'retval': retval} + return HttpResponse(dumps(response), mimetype='application/json') diff --git a/examples/next-steps/proj/__init__.py b/examples/next-steps/proj/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/examples/next-steps/proj/celery.py b/examples/next-steps/proj/celery.py new file mode 100644 index 0000000..db98708 --- /dev/null +++ b/examples/next-steps/proj/celery.py @@ -0,0 +1,16 @@ +from __future__ import absolute_import + +from celery import Celery + +app = Celery('proj', + broker='amqp://', + backend='amqp://', + include=['proj.tasks']) + +# Optional configuration, see the application user guide. +app.conf.update( + CELERY_TASK_RESULT_EXPIRES=3600, +) + +if __name__ == '__main__': + app.start() diff --git a/examples/next-steps/proj/tasks.py b/examples/next-steps/proj/tasks.py new file mode 100644 index 0000000..b69ac96 --- /dev/null +++ b/examples/next-steps/proj/tasks.py @@ -0,0 +1,18 @@ +from __future__ import absolute_import + +from proj.celery import app + + +@app.task +def add(x, y): + return x + y + + +@app.task +def mul(x, y): + return x * y + + +@app.task +def xsum(numbers): + return sum(numbers) diff --git a/examples/next-steps/setup.py b/examples/next-steps/setup.py new file mode 100644 index 0000000..7eaccf9 --- /dev/null +++ b/examples/next-steps/setup.py @@ -0,0 +1,20 @@ +""" +Example setup file for a project using Celery. + +This can be used to distribute your tasks and worker +as a Python package, on PyPI or on your own private package index. + +""" +from setuptools import setup, find_packages + +setup( + name='example-tasks', + version='1.0', + description='Tasks for my project', + packages=find_packages(exclude=['ez_setup', 'tests', 'tests.*']), + zip_safe=False, + install_requires=[ + 'celery>=3.0', + #'requests', + ], +) diff --git a/examples/resultgraph/tasks.py b/examples/resultgraph/tasks.py new file mode 100644 index 0000000..bb14d27 --- /dev/null +++ b/examples/resultgraph/tasks.py @@ -0,0 +1,103 @@ +# Example:: +# >>> R = A.apply_async() +# >>> list(joinall(R)) +# [['A 0', 'A 1', 'A 2', 'A 3', 'A 4', 'A 5', 'A 6', 'A 7', 'A 8', 'A 9'], +# ['B 0', 'B 1', 'B 2', 'B 3', 'B 4', 'B 5', 'B 6', 'B 7', 'B 8', 'B 9'], +# ['C 0', 'C 1', 'C 2', 'C 3', 'C 4', 'C 5', 'C 6', 'C 7', 'C 8', 'C 9'], +# ['D 0', 'D 1', 'D 2', 'D 3', 'D 4', 'D 5', 'D 6', 'D 7', 'D 8', 'D 9'], +# ['E 0', 'E 1', 'E 2', 'E 3', 'E 4', 'E 5', 'E 6', 'E 7', 'E 8', 'E 9'], +# ['F 0', 'F 1', 'F 2', 'F 3', 'F 4', 'F 5', 'F 6', 'F 7', 'F 8', 'F 9'], +# ['G 0', 'G 1', 'G 2', 'G 3', 'G 4', 'G 5', 'G 6', 'G 7', 'G 8', 'G 9'], +# ['H 0', 'H 1', 'H 2', 'H 3', 'H 4', 'H 5', 'H 6', 'H 7', 'H 8', 'H 9']] +# +# +# Joining the graph asynchronously with a callback +# (Note: only two levels, the deps are considered final +# when the second task is ready.) +# +# >>> unlock_graph.apply_async((A.apply_async(), +# ... A_callback.subtask()), countdown=1) + + +from celery import chord, group, task, signature, uuid +from celery.result import AsyncResult, ResultSet, allow_join_result +from collections import deque + + +@task() +def add(x, y): + return x + y + + +@task() +def make_request(id, url): + print('GET {0!r}'.format(url)) + return url + + +@task() +def B_callback(urls, id): + print('batch {0} done'.format(id)) + return urls + + +@task() +def B(id): + return chord( + make_request.s(id, '{0} {1!r}'.format(id, i)) + for i in range(10) + )(B_callback.s(id)) + + +@task() +def A(): + return group(B.s(c) for c in 'ABCDEFGH').apply_async() + + +def joinall(R, timeout=None, propagate=True): + stack = deque([R]) + + try: + use_native = joinall.backend.supports_native_join + except AttributeError: + use_native = False + + while stack: + res = stack.popleft() + if isinstance(res, ResultSet): + j = res.join_native if use_native else res.join + stack.extend(j(timeout=timeout, propagate=propagate)) + elif isinstance(res, AsyncResult): + stack.append(res.get(timeout=timeout, propagate=propagate)) + else: + yield res + + +@task() +def unlock_graph(result, callback, + interval=1, propagate=False, max_retries=None): + if result.ready(): + second_level_res = result.get() + if second_level_res.ready(): + with allow_join_result(): + signature(callback).delay(list(joinall( + second_level_res, propagate=propagate))) + else: + unlock_graph.retry(countdown=interval, max_retries=max_retries) + + +@task() +def A_callback(res): + print('Everything is done: {0!r}'.format(res)) + return res + + +class chord2(object): + + def __init__(self, tasks, **options): + self.tasks = tasks + self.options = options + + def __call__(self, body, **options): + body.options.setdefault('task_id', uuid()) + unlock_graph.apply_async() diff --git a/examples/tutorial/tasks.py b/examples/tutorial/tasks.py new file mode 100644 index 0000000..7b9d648 --- /dev/null +++ b/examples/tutorial/tasks.py @@ -0,0 +1,13 @@ +from __future__ import absolute_import + +from celery import Celery + +app = Celery('tasks', broker='amqp://') + + +@app.task() +def add(x, y): + return x + y + +if __name__ == '__main__': + app.start() diff --git a/extra/bash-completion/celery.bash b/extra/bash-completion/celery.bash new file mode 100644 index 0000000..985caf0 --- /dev/null +++ b/extra/bash-completion/celery.bash @@ -0,0 +1,129 @@ +# This is a bash completion script for celery +# Redirect it to a file, then source it or copy it to /etc/bash_completion.d +# to get tab completion. celery must be on your PATH for this to work. +_celery() +{ + local cur basep opts base kval kkey loglevels prevp in_opt controlargs + local pools + COMPREPLY=() + cur="${COMP_WORDS[COMP_CWORD]}" + prevp="${COMP_WORDS[COMP_CWORD-1]}" + basep="${COMP_WORDS[1]}" + opts="worker events beat shell multi amqp status + inspect control purge list migrate call result report" + fargs="--app= --broker= --loader= --config= --version" + dopts="--detach --umask= --gid= --uid= --pidfile= --logfile= --loglevel=" + controlargs="--timeout --destination" + pools="prefork eventlet gevent threads solo" + loglevels="critical error warning info debug" + in_opt=0 + + # find the current subcommand, store in basep' + for index in $(seq 1 $((${#COMP_WORDS[@]} - 2))) + do + basep=${COMP_WORDS[$index]} + if [ "${basep:0:2}" != "--" ]; then + break; + fi + done + + if [ "${cur:0:2}" == "--" -a "$cur" != "${cur//=}" ]; then + in_opt=1 + kkey="${cur%=*}" + kval="${cur#*=}" + elif [ "${prevp:0:1}" == "-" ]; then + in_opt=1 + kkey="$prevp" + kval="$cur" + fi + + if [ $in_opt -eq 1 ]; then + case "${kkey}" in + --uid|-u) + COMPREPLY=( $(compgen -u -- "$kval") ) + return 0 + ;; + --gid|-g) + COMPREPLY=( $(compgen -g -- "$kval") ) + return 0 + ;; + --pidfile|--logfile|-p|-f|--statedb|-S|-s|--schedule-filename) + COMPREPLY=( $(compgen -f -- "$kval") ) + return 0 + ;; + --workdir) + COMPREPLY=( $(compgen -d -- "$kval") ) + return 0 + ;; + --loglevel|-l) + COMPREPLY=( $(compgen -W "$loglevels" -- "$kval") ) + return 0 + ;; + --pool|-P) + COMPREPLY=( $(compgen -W "$pools" -- "$kval") ) + return 0 + ;; + *) + ;; + esac + fi + + case "${basep}" in + worker) + COMPREPLY=( $(compgen -W '--concurrency= --pool= --purge --logfile= + --loglevel= --hostname= --beat --schedule= --scheduler= --statedb= --events + --time-limit= --soft-time-limit= --maxtasksperchild= --queues= + --include= --pidfile= --autoscale= --autoreload --no-execv $fargs' -- ${cur} ) ) + return 0 + ;; + inspect) + COMPREPLY=( $(compgen -W 'active active_queues ping registered report + reserved revoked scheduled stats --help $controlargs $fargs' -- ${cur}) ) + return 0 + ;; + control) + COMPREPLY=( $(compgen -W 'add_consumer autoscale cancel_consumer + disable_events enable_events pool_grow pool_shrink + rate_limit time_limit --help $controlargs $fargs' -- ${cur}) ) + return 0 + ;; + multi) + COMPREPLY=( $(compgen -W 'start restart stopwait stop show + kill names expand get help --quiet --nosplash + --verbose --no-color --help $fargs' -- ${cur} ) ) + return 0 + ;; + amqp) + COMPREPLY=( $(compgen -W 'queue.declare queue.purge exchange.delete + basic.publish exchange.declare queue.delete queue.bind + basic.get --help $fargs' -- ${cur} )) + return 0 + ;; + list) + COMPREPLY=( $(compgen -W 'bindings $fargs' -- ${cur} ) ) + return 0 + ;; + shell) + COMPREPLY=( $(compgen -W '--ipython --bpython --python + --without-tasks --eventlet --gevent $fargs' -- ${cur} ) ) + return 0 + ;; + beat) + COMPREPLY=( $(compgen -W '--schedule= --scheduler= + --max-interval= $dopts $fargs' -- ${cur} )) + return 0 + ;; + events) + COMPREPLY=( $(compgen -W '--dump --camera= --freq= + --maxrate= $dopts $fargs' -- ${cur})) + return 0 + ;; + *) + ;; + esac + + COMPREPLY=($(compgen -W "${opts} ${fargs}" -- ${cur})) + return 0 +} +complete -F _celery celery + diff --git a/extra/centos/celeryd b/extra/centos/celeryd new file mode 100644 index 0000000..879a99f --- /dev/null +++ b/extra/centos/celeryd @@ -0,0 +1,265 @@ +#!/bin/sh +# ============================================ +# celeryd - Starts the Celery worker daemon. +# ============================================ +# +# :Usage: /etc/init.d/celeryd {start|stop|restart|status} +# :Configuration file: /etc/sysconfig/celeryd +# +# See http://docs.celeryproject.org/en/latest/tutorials/daemonizing.html + +### BEGIN INIT INFO +# Provides: celeryd +# Required-Start: $network $local_fs $remote_fs +# Required-Stop: $network $local_fs $remote_fs +# Default-Start: 2 3 4 5 +# Default-Stop: 0 1 6 +# Short-Description: celery task worker daemon +### END INIT INFO +# +# +# To implement separate init scripts, do NOT copy this script. Instead, +# symlink it. I.e., if my new application, "little-worker" needs an init, I +# should just use: +# +# ln -s /etc/init.d/celeryd /etc/init.d/little-worker +# +# You can then configure this by manipulating /etc/sysconfig/little-worker. +# +# Setting `prog` here allows you to symlink this init script, making it easy +# to run multiple processes on the system. + +# If we're invoked via SysV-style runlevel scripts we need to follow the +# link from rcX.d before working out the script name. +if [[ `dirname $0` == /etc/rc*.d ]]; then + target="$(readlink $0)" +else + target=$0 +fi + +prog="$(basename $target)" + +# Source the centos service helper functions +source /etc/init.d/functions +# NOTE: "set -e" does not work with the above functions, +# which use non-zero return codes as non-error return conditions + +# some commands work asyncronously, so we'll wait this many seconds +SLEEP_SECONDS=5 + +DEFAULT_PID_FILE="/var/run/celery/$prog-%n.pid" +DEFAULT_LOG_FILE="/var/log/celery/$prog-%n.log" +DEFAULT_LOG_LEVEL="INFO" +DEFAULT_NODES="celery" +DEFAULT_CELERYD="-m celery.bin.celeryd_detach" + +CELERY_DEFAULTS=${CELERY_DEFAULTS:-"/etc/sysconfig/$prog"} + +test -f "$CELERY_DEFAULTS" && . "$CELERY_DEFAULTS" + +# Set CELERY_CREATE_DIRS to always create log/pid dirs. +CELERY_CREATE_DIRS=${CELERY_CREATE_DIRS:-0} +CELERY_CREATE_RUNDIR=$CELERY_CREATE_DIRS +CELERY_CREATE_LOGDIR=$CELERY_CREATE_DIRS +if [ -z "$CELERYD_PID_FILE" ]; then + CELERYD_PID_FILE="$DEFAULT_PID_FILE" + CELERY_CREATE_RUNDIR=1 +fi +if [ -z "$CELERYD_LOG_FILE" ]; then + CELERYD_LOG_FILE="$DEFAULT_LOG_FILE" + CELERY_CREATE_LOGDIR=1 +fi + +CELERYD_LOG_LEVEL=${CELERYD_LOG_LEVEL:-${CELERYD_LOGLEVEL:-$DEFAULT_LOG_LEVEL}} +CELERYD_MULTI=${CELERYD_MULTI:-"celeryd-multi"} +CELERYD=${CELERYD:-$DEFAULT_CELERYD} +CELERYD_NODES=${CELERYD_NODES:-$DEFAULT_NODES} + +# This is used to change how Celery loads in the configs. It does not need to +# be set to be run. +export CELERY_LOADER + +if [ -n "$2" ]; then + CELERYD_OPTS="$CELERYD_OPTS $2" +fi + +CELERYD_LOG_DIR=`dirname $CELERYD_LOG_FILE` +CELERYD_PID_DIR=`dirname $CELERYD_PID_FILE` + +# Extra start-stop-daemon options, like user/group. +if [ -n "$CELERYD_USER" ]; then + DAEMON_OPTS="$DAEMON_OPTS --uid=$CELERYD_USER" +fi +if [ -n "$CELERYD_GROUP" ]; then + DAEMON_OPTS="$DAEMON_OPTS --gid=$CELERYD_GROUP" +fi + +if [ -n "$CELERYD_CHDIR" ]; then + DAEMON_OPTS="$DAEMON_OPTS --workdir=$CELERYD_CHDIR" +fi + +check_dev_null() { + if [ ! -c /dev/null ]; then + echo "/dev/null is not a character device!" + exit 75 # EX_TEMPFAIL + fi +} + + +maybe_die() { + if [ $? -ne 0 ]; then + echo "Exiting: $* (errno $?)" + exit 77 # EX_NOPERM + fi +} + +create_default_dir() { + if [ ! -d "$1" ]; then + echo "- Creating default directory: '$1'" + mkdir -p "$1" + maybe_die "Couldn't create directory $1" + echo "- Changing permissions of '$1' to 02755" + chmod 02755 "$1" + maybe_die "Couldn't change permissions for $1" + if [ -n "$CELERYD_USER" ]; then + echo "- Changing owner of '$1' to '$CELERYD_USER'" + chown "$CELERYD_USER" "$1" + maybe_die "Couldn't change owner of $1" + fi + if [ -n "$CELERYD_GROUP" ]; then + echo "- Changing group of '$1' to '$CELERYD_GROUP'" + chgrp "$CELERYD_GROUP" "$1" + maybe_die "Couldn't change group of $1" + fi + fi +} + + +check_paths() { + if [ $CELERY_CREATE_LOGDIR -eq 1 ]; then + create_default_dir "$CELERYD_LOG_DIR" + fi + if [ $CELERY_CREATE_RUNDIR -eq 1 ]; then + create_default_dir "$CELERYD_PID_DIR" + fi +} + +create_paths() { + create_default_dir "$CELERYD_LOG_DIR" + create_default_dir "$CELERYD_PID_DIR" +} + +export PATH="${PATH:+$PATH:}/usr/sbin:/sbin" + + +_get_pid_files() { + [[ ! -d "$CELERYD_PID_DIR" ]] && return + echo $(ls -1 "$CELERYD_PID_DIR"/$prog-*.pid 2> /dev/null) +} + +stop() { + local pid_files=$(_get_pid_files) + [[ -z "$pid_files" ]] && echo "$prog is stopped" && return 0 + + local one_failed= + for pid_file in $pid_files; do + local pid=$(cat "$pid_file") + echo -n $"Stopping $prog (pid $pid): " + + # killproc comes from 'functions' and brings three nice features: + # 1. sending TERM, sleeping, then sleeping more if needed, then sending KILL + # 2. handling 'success' and 'failure' output + # 3. removes stale pid files, if any remain + killproc -p "$pid_file" -d "$SLEEP_SECONDS" $prog || one_failed=true + echo + done + + [[ "$one_failed" ]] && return 1 || return 0 +} + +start() { + echo -n $"Starting $prog: " + + # If Celery is already running, bail out + local pid_files=$(_get_pid_files) + if [[ "$pid_files" ]]; then + echo -n $"$prog is already running. Use 'restart'." + failure + echo + return 1 + fi + + $CELERYD_MULTI start $CELERYD_NODES $DAEMON_OPTS \ + --pidfile="$CELERYD_PID_FILE" \ + --logfile="$CELERYD_LOG_FILE" \ + --loglevel="$CELERYD_LOG_LEVEL" \ + --cmd="$CELERYD" \ + --quiet \ + $CELERYD_OPTS + + if [[ "$?" == "0" ]]; then + # Sleep a few seconds to give Celery a chance to initialize itself. + # This is useful to prevent scripts following this one from trying to + # use Celery (or its pid files) too early. + sleep $SLEEP_SECONDS + pid_files=$(_get_pid_files) + if [[ "$pid_files" ]]; then + for pid_file in $pid_files; do + local node=$(basename "$pid_file" .pid) + local pid=$(cat "$pid_file") + echo + echo -n " $node (pid $pid):" + success + done + echo + return 0 + else # celeryd_multi succeeded but no pid files found + failure + fi + else # celeryd_multi did not succeed + failure + fi + echo + return 1 +} + +check_status() { + local pid_files=$(_get_pid_files) + [[ -z "$pid_files" ]] && echo "$prog is stopped" && return 1 + for pid_file in $pid_files; do + local node=$(basename "$pid_file" .pid) + status -p "$pid_file" $"$prog (node $node)" || return 1 # if one node is down celeryd is down + done + return 0 +} + +case "$1" in + start) + check_dev_null + check_paths + start + ;; + + stop) + check_dev_null + check_paths + stop + ;; + + status) + check_status + ;; + + restart) + check_dev_null + check_paths + stop && start + ;; + + *) + echo "Usage: /etc/init.d/$prog {start|stop|restart|status}" + exit 3 + ;; +esac + +exit $? diff --git a/extra/centos/celeryd.sysconfig b/extra/centos/celeryd.sysconfig new file mode 100644 index 0000000..e1d98bd --- /dev/null +++ b/extra/centos/celeryd.sysconfig @@ -0,0 +1,24 @@ +# In CentOS, contents should be placed in the file /etc/sysconfig/celeryd + +# Names of nodes to start (space-separated) +#CELERYD_NODES="my_application-node_1" + +# Where to chdir at start. This could be the root of a virtualenv. +#CELERYD_CHDIR="/path/to/my_application" + +# How to call celeryd-multi +#CELERYD_MULTI="$CELERYD_CHDIR/bin/celeryd-multi" + +# Extra arguments +#CELERYD_OPTS="--app=my_application.path.to.worker --time-limit=300 --concurrency=8 --loglevel=DEBUG" + +# Create log/pid dirs, if they don't already exist +#CELERY_CREATE_DIRS=1 + +# %n will be replaced with the nodename +#CELERYD_LOG_FILE="/path/to/my_application/log/%n.log" +#CELERYD_PID_FILE="/var/run/celery/%n.pid" + +# Workers run as an unprivileged user +#CELERYD_USER=celery +#CELERYD_GROUP=celery diff --git a/extra/centos/test_celeryd.sh b/extra/centos/test_celeryd.sh new file mode 100755 index 0000000..a331c2c --- /dev/null +++ b/extra/centos/test_celeryd.sh @@ -0,0 +1,41 @@ +#!/bin/sh + +# If you make changes to the celeryd init script, +# you can use this test script to verify you didn't break the universe + +SERVICE="celeryd" +SERVICE_CMD="sudo /sbin/service $SERVICE" + +run_test() { + local msg="$1" + local cmd="$2" + local expected_retval="${3:-0}" + local n=${#msg} + + echo + echo `printf "%$((${n}+4))s" | tr " " "#"` + echo "# $msg #" + echo `printf "%$((${n}+4))s" | tr " " "#"` + + $cmd + local retval=$? + if [[ "$retval" == "$expected_retval" ]]; then + echo "[PASSED]" + else + echo "[FAILED]" + echo "Exit status: $retval, but expected: $expected_retval" + exit $retval + fi +} + +run_test "stop should succeed" "$SERVICE_CMD stop" 0 +run_test "status on a stopped service should return 1" "$SERVICE_CMD status" 1 +run_test "stopping a stopped celery should not fail" "$SERVICE_CMD stop" 0 +run_test "start should succeed" "$SERVICE_CMD start" 0 +run_test "status on a running service should return 0" "$SERVICE_CMD status" 0 +run_test "starting a running service should fail" "$SERVICE_CMD start" 1 +run_test "restarting a running service should succeed" "$SERVICE_CMD restart" 0 +run_test "status on a restarted service should return 0" "$SERVICE_CMD status" 0 +run_test "stop should succeed" "$SERVICE_CMD stop" 0 + +echo "All tests passed!" diff --git a/extra/generic-init.d/celerybeat b/extra/generic-init.d/celerybeat new file mode 100755 index 0000000..46b73c3 --- /dev/null +++ b/extra/generic-init.d/celerybeat @@ -0,0 +1,309 @@ +#!/bin/bash +# ========================================================= +# celerybeat - Starts the Celery periodic task scheduler. +# ========================================================= +# +# :Usage: /etc/init.d/celerybeat {start|stop|force-reload|restart|try-restart|status} +# :Configuration file: /etc/default/celerybeat or /etc/default/celeryd +# +# See http://docs.celeryproject.org/en/latest/tutorials/daemonizing.html#generic-init-scripts + +### BEGIN INIT INFO +# Provides: celerybeat +# Required-Start: $network $local_fs $remote_fs +# Required-Stop: $network $local_fs $remote_fs +# Default-Start: 2 3 4 5 +# Default-Stop: 0 1 6 +# Short-Description: celery periodic task scheduler +# Description: Controls the Celery periodic task scheduler. +### END INIT INFO + +# Cannot use set -e/bash -e since the kill -0 command will abort +# abnormally in the absence of a valid process ID. +#set -e + +. /lib/lsb/init-functions + +VERSION=10.0 +echo "celery init v${VERSION}." + +if [ $(id -u) -ne 0 ]; then + echo "Error: This program can only be used by the root user." + echo " Unpriviliged users must use 'celery beat --detach'" + exit 1 +fi + + +# May be a runlevel symlink (e.g. S02celeryd) +if [ -L "$0" ]; then + SCRIPT_FILE=$(readlink "$0") +else + SCRIPT_FILE="$0" +fi +SCRIPT_NAME="$(basename "$SCRIPT_FILE")" + +# /etc/init.d/celerybeat: start and stop the celery periodic task scheduler daemon. + +# Make sure executable configuration script is owned by root +_config_sanity() { + local path="$1" + local owner=$(ls -ld "$path" | awk '{print $3}') + local iwgrp=$(ls -ld "$path" | cut -b 6) + local iwoth=$(ls -ld "$path" | cut -b 9) + + if [ "$(id -u $owner)" != "0" ]; then + echo "Error: Config script '$path' must be owned by root!" + echo + echo "Resolution:" + echo "Review the file carefully and make sure it has not been " + echo "modified with mailicious intent. When sure the " + echo "script is safe to execute with superuser privileges " + echo "you can change ownership of the script:" + echo " $ sudo chown root '$path'" + exit 1 + fi + + if [ "$iwoth" != "-" ]; then # S_IWOTH + echo "Error: Config script '$path' cannot be writable by others!" + echo + echo "Resolution:" + echo "Review the file carefully and make sure it has not been " + echo "modified with malicious intent. When sure the " + echo "script is safe to execute with superuser privileges " + echo "you can change the scripts permissions:" + echo " $ sudo chmod 640 '$path'" + exit 1 + fi + if [ "$iwgrp" != "-" ]; then # S_IWGRP + echo "Error: Config script '$path' cannot be writable by group!" + echo + echo "Resolution:" + echo "Review the file carefully and make sure it has not been " + echo "modified with malicious intent. When sure the " + echo "script is safe to execute with superuser privileges " + echo "you can change the scripts permissions:" + echo " $ sudo chmod 640 '$path'" + exit 1 + fi +} + +scripts="" + + +if test -f /etc/default/celeryd; then + scripts="/etc/default/celeryd" + _config_sanity /etc/default/celeryd + . /etc/default/celeryd +fi + +EXTRA_CONFIG="/etc/default/${SCRIPT_NAME}" +if test -f "$EXTRA_CONFIG"; then + scripts="$scripts, $EXTRA_CONFIG" + _config_sanity "$EXTRA_CONFIG" + . "$EXTRA_CONFIG" +fi + +echo "Using configuration: $scripts" + +CELERY_BIN=${CELERY_BIN:-"celery"} +DEFAULT_USER="celery" +DEFAULT_PID_FILE="/var/run/celery/beat.pid" +DEFAULT_LOG_FILE="/var/log/celery/beat.log" +DEFAULT_LOG_LEVEL="INFO" +DEFAULT_CELERYBEAT="$CELERY_BIN beat" +DEFAULT_ENABLED="false" + +ENABLED=${ENABLED:-$DEFAULT_ENABLED} +if [ "$ENABLED" != "true" ]; then + echo "celerybeat daemon disabled - see $scripts." + exit 0 +fi + +CELERYBEAT=${CELERYBEAT:-$DEFAULT_CELERYBEAT} +CELERYBEAT_LOG_LEVEL=${CELERYBEAT_LOG_LEVEL:-${CELERYBEAT_LOGLEVEL:-$DEFAULT_LOG_LEVEL}} + +# Sets --app argument for CELERY_BIN +CELERY_APP_ARG="" +if [ ! -z "$CELERY_APP" ]; then + CELERY_APP_ARG="--app=$CELERY_APP" +fi + +CELERYBEAT_USER=${CELERYBEAT_USER:-${CELERYD_USER:-$DEFAULT_USER}} + +# Set CELERY_CREATE_DIRS to always create log/pid dirs. +CELERY_CREATE_DIRS=${CELERY_CREATE_DIRS:-0} +CELERY_CREATE_RUNDIR=$CELERY_CREATE_DIRS +CELERY_CREATE_LOGDIR=$CELERY_CREATE_DIRS +if [ -z "$CELERYBEAT_PID_FILE" ]; then + CELERYBEAT_PID_FILE="$DEFAULT_PID_FILE" + CELERY_CREATE_RUNDIR=1 +fi +if [ -z "$CELERYBEAT_LOG_FILE" ]; then + CELERYBEAT_LOG_FILE="$DEFAULT_LOG_FILE" + CELERY_CREATE_LOGDIR=1 +fi + +export CELERY_LOADER + +CELERYBEAT_OPTS="$CELERYBEAT_OPTS -f $CELERYBEAT_LOG_FILE -l $CELERYBEAT_LOG_LEVEL" + +if [ -n "$2" ]; then + CELERYBEAT_OPTS="$CELERYBEAT_OPTS $2" +fi + +CELERYBEAT_LOG_DIR=`dirname $CELERYBEAT_LOG_FILE` +CELERYBEAT_PID_DIR=`dirname $CELERYBEAT_PID_FILE` + +# Extra start-stop-daemon options, like user/group. + +CELERYBEAT_CHDIR=${CELERYBEAT_CHDIR:-$CELERYD_CHDIR} +if [ -n "$CELERYBEAT_CHDIR" ]; then + DAEMON_OPTS="$DAEMON_OPTS --workdir=$CELERYBEAT_CHDIR" +fi + + +export PATH="${PATH:+$PATH:}/usr/sbin:/sbin" + +check_dev_null() { + if [ ! -c /dev/null ]; then + echo "/dev/null is not a character device!" + exit 75 # EX_TEMPFAIL + fi +} + +maybe_die() { + if [ $? -ne 0 ]; then + echo "Exiting: $*" + exit 77 # EX_NOPERM + fi +} + +create_default_dir() { + if [ ! -d "$1" ]; then + echo "- Creating default directory: '$1'" + mkdir -p "$1" + maybe_die "Couldn't create directory $1" + echo "- Changing permissions of '$1' to 02755" + chmod 02755 "$1" + maybe_die "Couldn't change permissions for $1" + if [ -n "$CELERYBEAT_USER" ]; then + echo "- Changing owner of '$1' to '$CELERYBEAT_USER'" + chown "$CELERYBEAT_USER" "$1" + maybe_die "Couldn't change owner of $1" + fi + if [ -n "$CELERYBEAT_GROUP" ]; then + echo "- Changing group of '$1' to '$CELERYBEAT_GROUP'" + chgrp "$CELERYBEAT_GROUP" "$1" + maybe_die "Couldn't change group of $1" + fi + fi +} + +check_paths() { + if [ $CELERY_CREATE_LOGDIR -eq 1 ]; then + create_default_dir "$CELERYBEAT_LOG_DIR" + fi + if [ $CELERY_CREATE_RUNDIR -eq 1 ]; then + create_default_dir "$CELERYBEAT_PID_DIR" + fi +} + + +create_paths () { + create_default_dir "$CELERYBEAT_LOG_DIR" + create_default_dir "$CELERYBEAT_PID_DIR" +} + + +wait_pid () { + pid=$1 + forever=1 + i=0 + while [ $forever -gt 0 ]; do + kill -0 $pid 1>/dev/null 2>&1 + if [ $? -eq 1 ]; then + echo "OK" + forever=0 + else + kill -TERM "$pid" + i=$((i + 1)) + if [ $i -gt 60 ]; then + echo "ERROR" + echo "Timed out while stopping (30s)" + forever=0 + else + sleep 0.5 + fi + fi + done +} + + +stop_beat () { + echo -n "Stopping ${SCRIPT_NAME}... " + if [ -f "$CELERYBEAT_PID_FILE" ]; then + wait_pid $(cat "$CELERYBEAT_PID_FILE") + else + echo "NOT RUNNING" + fi +} + +_chuid () { + su "$CELERYBEAT_USER" -c "$CELERYBEAT $*" +} + +start_beat () { + echo "Starting ${SCRIPT_NAME}..." + _chuid $CELERY_APP_ARG $CELERYBEAT_OPTS $DAEMON_OPTS --detach \ + --pidfile="$CELERYBEAT_PID_FILE" +} + +status () { + pid=$(cat "$CELERYBEAT_PID_FILE") + kill -0 $pid 1>/dev/null 2>&1 + if [ $? -eq 0 ]; then + echo "celerybeat running" + else + echo "celerybeat not running" + exit 1 + fi +} + +case "$1" in + start) + check_dev_null + check_paths + start_beat + ;; + stop) + check_paths + stop_beat + ;; + reload|force-reload) + echo "Use start+stop" + ;; + restart) + echo "Restarting celery periodic task scheduler" + check_paths + stop_beat + check_dev_null + start_beat + ;; + create-paths) + check_dev_null + create_paths + ;; + check-paths) + check_dev_null + check_paths + ;; + status) + status + ;; + *) + echo "Usage: /etc/init.d/${SCRIPT_NAME} {start|stop|restart|create-paths|status}" + exit 64 # EX_USAGE + ;; +esac + +exit 0 diff --git a/extra/generic-init.d/celeryd b/extra/generic-init.d/celeryd new file mode 100755 index 0000000..a9d0c27 --- /dev/null +++ b/extra/generic-init.d/celeryd @@ -0,0 +1,398 @@ +#!/bin/sh -e +# ============================================ +# celeryd - Starts the Celery worker daemon. +# ============================================ +# +# :Usage: /etc/init.d/celeryd {start|stop|force-reload|restart|try-restart|status} +# :Configuration file: /etc/default/celeryd +# +# See http://docs.celeryproject.org/en/latest/tutorials/daemonizing.html#generic-init-scripts + + +### BEGIN INIT INFO +# Provides: celeryd +# Required-Start: $network $local_fs $remote_fs +# Required-Stop: $network $local_fs $remote_fs +# Default-Start: 2 3 4 5 +# Default-Stop: 0 1 6 +# Short-Description: celery task worker daemon +# Description: Controls a celery task worker daemon instance +### END INIT INFO +# +# +# To implement separate init scripts, copy this script and give it a different +# name: +# I.e., if my new application, "little-worker" needs an init, I +# should just use: +# +# cp /etc/init.d/celeryd /etc/init.d/little-worker +# +# You can then configure this by manipulating /etc/default/little-worker. +# + +. /lib/lsb/init-functions + +VERSION=10.0 +echo "celery init v${VERSION}." +if [ $(id -u) -ne 0 ]; then + echo "Error: This program can only be used by the root user." + echo " Unprivileged users must use the 'celery multi' utility, " + echo " or 'celery worker --detach'." + exit 1 +fi + + +# Can be a runlevel symlink (e.g. S02celeryd) +if [ -L "$0" ]; then + SCRIPT_FILE=$(readlink "$0") +else + SCRIPT_FILE="$0" +fi +SCRIPT_NAME="$(basename "$SCRIPT_FILE")" + +DEFAULT_USER="celery" +DEFAULT_PID_FILE="/var/run/celery/%n.pid" +DEFAULT_LOG_FILE="/var/log/celery/%n.log" +DEFAULT_LOG_LEVEL="INFO" +DEFAULT_NODES="celery" +DEFAULT_CELERYD="-m celery worker --detach" +DEFAULT_ENABLED="false" + +CELERY_DEFAULTS=${CELERY_DEFAULTS:-"/etc/default/${SCRIPT_NAME}"} + +# Make sure executable configuration script is owned by root +_config_sanity() { + local path="$1" + local owner=$(ls -ld "$path" | awk '{print $3}') + local iwgrp=$(ls -ld "$path" | cut -b 6) + local iwoth=$(ls -ld "$path" | cut -b 9) + + if [ "$(id -u $owner)" != "0" ]; then + echo "Error: Config script '$path' must be owned by root!" + echo + echo "Resolution:" + echo "Review the file carefully and make sure it has not been " + echo "modified with mailicious intent. When sure the " + echo "script is safe to execute with superuser privileges " + echo "you can change ownership of the script:" + echo " $ sudo chown root '$path'" + exit 1 + fi + + if [ "$iwoth" != "-" ]; then # S_IWOTH + echo "Error: Config script '$path' cannot be writable by others!" + echo + echo "Resolution:" + echo "Review the file carefully and make sure it has not been " + echo "modified with malicious intent. When sure the " + echo "script is safe to execute with superuser privileges " + echo "you can change the scripts permissions:" + echo " $ sudo chmod 640 '$path'" + exit 1 + fi + if [ "$iwgrp" != "-" ]; then # S_IWGRP + echo "Error: Config script '$path' cannot be writable by group!" + echo + echo "Resolution:" + echo "Review the file carefully and make sure it has not been " + echo "modified with malicious intent. When sure the " + echo "script is safe to execute with superuser privileges " + echo "you can change the scripts permissions:" + echo " $ sudo chmod 640 '$path'" + exit 1 + fi +} + +if [ -f "$CELERY_DEFAULTS" ]; then + _config_sanity "$CELERY_DEFAULTS" + echo "Using config script: $CELERY_DEFAULTS" + . "$CELERY_DEFAULTS" +fi + +ENABLED=${ENABLED:-$DEFAULT_ENABLED} +if [ "$ENABLED" != "true" ]; then + echo "celery daemon disabled - see $CELERY_DEFAULTS" + exit 0 +fi + +# Sets --app argument for CELERY_BIN +CELERY_APP_ARG="" +if [ ! -z "$CELERY_APP" ]; then + CELERY_APP_ARG="--app=$CELERY_APP" +fi + +CELERYD_USER=${CELERYD_USER:-$DEFAULT_USER} + +# Set CELERY_CREATE_DIRS to always create log/pid dirs. +CELERY_CREATE_DIRS=${CELERY_CREATE_DIRS:-0} +CELERY_CREATE_RUNDIR=$CELERY_CREATE_DIRS +CELERY_CREATE_LOGDIR=$CELERY_CREATE_DIRS +if [ -z "$CELERYD_PID_FILE" ]; then + CELERYD_PID_FILE="$DEFAULT_PID_FILE" + CELERY_CREATE_RUNDIR=1 +fi +if [ -z "$CELERYD_LOG_FILE" ]; then + CELERYD_LOG_FILE="$DEFAULT_LOG_FILE" + CELERY_CREATE_LOGDIR=1 +fi + +CELERYD_LOG_LEVEL=${CELERYD_LOG_LEVEL:-${CELERYD_LOGLEVEL:-$DEFAULT_LOG_LEVEL}} +CELERY_BIN=${CELERY_BIN:-"celery"} +CELERYD_MULTI=${CELERYD_MULTI:-"$CELERY_BIN multi"} +CELERYD_NODES=${CELERYD_NODES:-$DEFAULT_NODES} + +export CELERY_LOADER + +if [ -n "$2" ]; then + CELERYD_OPTS="$CELERYD_OPTS $2" +fi + +CELERYD_LOG_DIR=`dirname $CELERYD_LOG_FILE` +CELERYD_PID_DIR=`dirname $CELERYD_PID_FILE` + +# Extra start-stop-daemon options, like user/group. +if [ -n "$CELERYD_CHDIR" ]; then + DAEMON_OPTS="$DAEMON_OPTS --workdir=$CELERYD_CHDIR" +fi + + +check_dev_null() { + if [ ! -c /dev/null ]; then + echo "/dev/null is not a character device!" + exit 75 # EX_TEMPFAIL + fi +} + + +maybe_die() { + if [ $? -ne 0 ]; then + echo "Exiting: $* (errno $?)" + exit 77 # EX_NOPERM + fi +} + +create_default_dir() { + if [ ! -d "$1" ]; then + echo "- Creating default directory: '$1'" + mkdir -p "$1" + maybe_die "Couldn't create directory $1" + echo "- Changing permissions of '$1' to 02755" + chmod 02755 "$1" + maybe_die "Couldn't change permissions for $1" + if [ -n "$CELERYD_USER" ]; then + echo "- Changing owner of '$1' to '$CELERYD_USER'" + chown "$CELERYD_USER" "$1" + maybe_die "Couldn't change owner of $1" + fi + if [ -n "$CELERYD_GROUP" ]; then + echo "- Changing group of '$1' to '$CELERYD_GROUP'" + chgrp "$CELERYD_GROUP" "$1" + maybe_die "Couldn't change group of $1" + fi + fi +} + + +check_paths() { + if [ $CELERY_CREATE_LOGDIR -eq 1 ]; then + create_default_dir "$CELERYD_LOG_DIR" + fi + if [ $CELERY_CREATE_RUNDIR -eq 1 ]; then + create_default_dir "$CELERYD_PID_DIR" + fi +} + +create_paths() { + create_default_dir "$CELERYD_LOG_DIR" + create_default_dir "$CELERYD_PID_DIR" +} + +export PATH="${PATH:+$PATH:}/usr/sbin:/sbin" + + +_get_pids() { + found_pids=0 + my_exitcode=0 + + for pid_file in "$CELERYD_PID_DIR"/*.pid; do + local pid=`cat "$pid_file"` + local cleaned_pid=`echo "$pid" | sed -e 's/[^0-9]//g'` + if [ -z "$pid" ] || [ "$cleaned_pid" != "$pid" ]; then + echo "bad pid file ($pid_file)" + one_failed=true + my_exitcode=1 + else + found_pids=1 + echo "$pid" + fi + + if [ $found_pids -eq 0 ]; then + echo "${SCRIPT_NAME}: All nodes down" + exit $my_exitcode + fi + done +} + + +_chuid () { + su "$CELERYD_USER" -c "$CELERYD_MULTI $*" +} + + +start_workers () { + if [ ! -z "$CELERYD_ULIMIT" ]; then + ulimit $CELERYD_ULIMIT + fi + _chuid $* start $CELERYD_NODES $DAEMON_OPTS \ + --pidfile="$CELERYD_PID_FILE" \ + --logfile="$CELERYD_LOG_FILE" \ + --loglevel="$CELERYD_LOG_LEVEL" \ + $CELERY_APP_ARG \ + $CELERYD_OPTS +} + + +dryrun () { + (C_FAKEFORK=1 start_workers --verbose) +} + + +stop_workers () { + _chuid stopwait $CELERYD_NODES --pidfile="$CELERYD_PID_FILE" +} + + +restart_workers () { + _chuid restart $CELERYD_NODES $DAEMON_OPTS \ + --pidfile="$CELERYD_PID_FILE" \ + --logfile="$CELERYD_LOG_FILE" \ + --loglevel="$CELERYD_LOG_LEVEL" \ + $CELERY_APP_ARG \ + $CELERYD_OPTS +} + + +kill_workers() { + _chuid kill $CELERYD_NODES --pidfile="$CELERYD_PID_FILE" +} + + +restart_workers_graceful () { + local worker_pids= + worker_pids=`_get_pids` + [ "$one_failed" ] && exit 1 + + for worker_pid in $worker_pids; do + local failed= + kill -HUP $worker_pid 2> /dev/null || failed=true + if [ "$failed" ]; then + echo "${SCRIPT_NAME} worker (pid $worker_pid) could not be restarted" + one_failed=true + else + echo "${SCRIPT_NAME} worker (pid $worker_pid) received SIGHUP" + fi + done + + [ "$one_failed" ] && exit 1 || exit 0 +} + + +check_status () { + my_exitcode=0 + found_pids=0 + + local one_failed= + for pid_file in "$CELERYD_PID_DIR"/*.pid; do + if [ ! -r $pid_file ]; then + echo "${SCRIPT_NAME} is stopped: no pids were found" + one_failed=true + break + fi + + local node=`basename "$pid_file" .pid` + local pid=`cat "$pid_file"` + local cleaned_pid=`echo "$pid" | sed -e 's/[^0-9]//g'` + if [ -z "$pid" ] || [ "$cleaned_pid" != "$pid" ]; then + echo "bad pid file ($pid_file)" + one_failed=true + else + local failed= + kill -0 $pid 2> /dev/null || failed=true + if [ "$failed" ]; then + echo "${SCRIPT_NAME} (node $node) (pid $pid) is stopped, but pid file exists!" + one_failed=true + else + echo "${SCRIPT_NAME} (node $node) (pid $pid) is running..." + fi + fi + done + + [ "$one_failed" ] && exit 1 || exit 0 +} + + +case "$1" in + start) + check_dev_null + check_paths + start_workers + ;; + + stop) + check_dev_null + check_paths + stop_workers + ;; + + reload|force-reload) + echo "Use restart" + ;; + + status) + check_status + ;; + + restart) + check_dev_null + check_paths + restart_workers + ;; + + graceful) + check_dev_null + restart_workers_graceful + ;; + + kill) + check_dev_null + kill_workers + ;; + + dryrun) + check_dev_null + dryrun + ;; + + try-restart) + check_dev_null + check_paths + restart_workers + ;; + + create-paths) + check_dev_null + create_paths + ;; + + check-paths) + check_dev_null + check_paths + ;; + + *) + echo "Usage: /etc/init.d/${SCRIPT_NAME} {start|stop|restart|graceful|kill|dryrun|create-paths}" + exit 64 # EX_USAGE + ;; +esac + +exit 0 diff --git a/extra/osx/org.celeryq.beat.plist b/extra/osx/org.celeryq.beat.plist new file mode 100644 index 0000000..4ad0d68 --- /dev/null +++ b/extra/osx/org.celeryq.beat.plist @@ -0,0 +1,29 @@ + + + + + Disabled + + GroupName + celery-beat + KeepAlive + + Label + org.celeryq.beat + Program + celery + ProgramArguments + + beat + --loglevel=WARNING + + RunAtLoad + + Umask + 7 + UserName + nobody + WorkingDirectory + / + + diff --git a/extra/osx/org.celeryq.worker.plist b/extra/osx/org.celeryq.worker.plist new file mode 100644 index 0000000..ea1adb1 --- /dev/null +++ b/extra/osx/org.celeryq.worker.plist @@ -0,0 +1,29 @@ + + + + + Disabled + + GroupName + celery-worker + KeepAlive + + Label + org.celeryq.worker + Program + celery + ProgramArguments + + worker + --loglevel=WARNING + + RunAtLoad + + Umask + 7 + UserName + nobody + WorkingDirectory + / + + diff --git a/extra/supervisord/celerybeat.conf b/extra/supervisord/celerybeat.conf new file mode 100644 index 0000000..e25c371 --- /dev/null +++ b/extra/supervisord/celerybeat.conf @@ -0,0 +1,22 @@ +; ================================ +; celery beat supervisor example +; ================================ + +[program:celerybeat] +; Set full path to celery program if using virtualenv +command=celery beat -A myapp --schedule /var/lib/celery/beat.db --loglevel=INFO + +; remove the -A myapp argument if you are not using an app instance + +directory=/path/to/project +user=nobody +numprocs=1 +stdout_logfile=/var/log/celery/beat.log +stderr_logfile=/var/log/celery/beat.log +autostart=true +autorestart=true +startsecs=10 + +; if rabbitmq is supervised, set its priority higher +; so it starts first +priority=999 diff --git a/extra/supervisord/celeryd.conf b/extra/supervisord/celeryd.conf new file mode 100644 index 0000000..f922937 --- /dev/null +++ b/extra/supervisord/celeryd.conf @@ -0,0 +1,29 @@ +; ================================== +; celery worker supervisor example +; ================================== + +[program:celery] +; Set full path to celery program if using virtualenv +command=celery worker -A proj --loglevel=INFO + +directory=/path/to/project +user=nobody +numprocs=1 +stdout_logfile=/var/log/celery/worker.log +stderr_logfile=/var/log/celery/worker.log +autostart=true +autorestart=true +startsecs=10 + +; Need to wait for currently executing tasks to finish at shutdown. +; Increase this if you have very long running tasks. +stopwaitsecs = 600 + +; When resorting to send SIGKILL to the program to terminate it +; send SIGKILL to its whole process group instead, +; taking care of its children as well. +killasgroup=true + +; if rabbitmq is supervised, set its priority higher +; so it starts first +priority=998 diff --git a/extra/supervisord/supervisord.conf b/extra/supervisord/supervisord.conf new file mode 100644 index 0000000..26e5fcb --- /dev/null +++ b/extra/supervisord/supervisord.conf @@ -0,0 +1,34 @@ +[unix_http_server] +file=/tmp/supervisor.sock ; path to your socket file + +[supervisord] +logfile=/var/log/supervisord/supervisord.log ; supervisord log file +logfile_maxbytes=50MB ; maximum size of logfile before rotation +logfile_backups=10 ; number of backed up logfiles +loglevel=info ; info, debug, warn, trace +pidfile=/var/run/supervisord.pid ; pidfile location +nodaemon=false ; run supervisord as a daemon +minfds=1024 ; number of startup file descriptors +minprocs=200 ; number of process descriptors +user=root ; default user +childlogdir=/var/log/supervisord/ ; where child log files will live + + +[rpcinterface:supervisor] +supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface + +[supervisorctl] +serverurl=unix:///tmp/supervisor.sock ; use unix:// schem for a unix sockets. + + +[include] + +# Uncomment this line for celeryd for Python +;files=celeryd.conf + +# Uncomment this line for celeryd for Django. +;files=django/celeryd.conf + + + + diff --git a/extra/systemd/celery.conf b/extra/systemd/celery.conf new file mode 100644 index 0000000..08b90cf --- /dev/null +++ b/extra/systemd/celery.conf @@ -0,0 +1,13 @@ +# See +# http://docs.celeryproject.org/en/latest/tutorials/daemonizing.html#available-options + +CELERY_APP="proj" +CELERYD_NODES="worker" +CELERYD_OPTS="" +CELERY_BIN="/usr/bin/python2 -m celery" +CELERYD_PID_FILE="/var/run/celery/%n.pid" +CELERYD_LOG_FILE="/var/log/celery/%n.log" +CELERYD_LOG_LEVEL="INFO" + +d /run/celery 0755 user users - +d /var/log/celery 0755 user users - diff --git a/extra/systemd/celery.service b/extra/systemd/celery.service new file mode 100644 index 0000000..5729d29 --- /dev/null +++ b/extra/systemd/celery.service @@ -0,0 +1,23 @@ +[Unit] +Description=Celery workers +After=network.target + +[Service] +Type=forking +User=user +Group=users +EnvironmentFile=-/etc/conf.d/celery +WorkingDirectory=/opt/Myproject/ +ExecStart=${CELERY_BIN} multi start $CELERYD_NODES \ + -A $CELERY_APP --pidfile=${CELERYD_PID_FILE} \ + --logfile=${CELERYD_LOG_FILE} --loglevel="${CELERYD_LOG_LEVEL}" \ + $CELERYD_OPTS +ExecStop=${CELERY_BIN} multi stopwait $CELERYD_NODES \ + --pidfile=${CELERYD_PID_FILE} +ExecReload=${CELERY_BIN} multi restart $CELERYD_NODES \ + -A ${CELERY_APP} --pidfile=${CELERYD_PID_FILE} \ + --logfile=${CELERYD_LOG_FILE} --loglevel="${CELERYD_LOG_LEVEL}" \ + $CELERYD_OPTS + +[Install] +WantedBy=multi-user.target diff --git a/extra/zsh-completion/celery.zsh b/extra/zsh-completion/celery.zsh new file mode 100644 index 0000000..ff1856a --- /dev/null +++ b/extra/zsh-completion/celery.zsh @@ -0,0 +1,134 @@ +# This is a zsh completion script for Celery +# It has to be installed as follows: +# +# Alternative A) Copy the script to your zsh site-functions directory (often +# ``/usr/share/zsh/site-functions``) and name the script ``_celery`` +# +# Alternative B). Or, use this file as a oh-my-zsh plugin (rename the script +# to ``_celery``), and add it to .zshrc e.g. plugins=(celery git osx ruby) +# + +_celery () { +local -a _1st_arguments ifargs dopts controlargs + +typeset -A opt_args + +_1st_arguments=('worker' 'events' 'beat' 'shell' 'multi' 'amqp' 'status' 'inspect' \ + 'control' 'purge' 'list' 'migrate' 'call' 'result' 'report') +ifargs=('--app=' '--broker=' '--loader=' '--config=' '--version') +dopts=('--detach' '--umask=' '--gid=' '--uid=' '--pidfile=' '--logfile=' '--loglevel=') +controlargs=('--timeout' '--destination') +_arguments \ + '(-A --app=)'{-A,--app}'[app instance to use (e.g. module.attr_name):APP]' \ + '(-b --broker=)'{-b,--broker}'[url to broker. default is "amqp://guest@localhost//":BROKER]' \ + '(--loader)--loader[name of custom loader class to use.:LOADER]' \ + '(--config)--config[Name of the configuration module:CONFIG]' \ + '(--workdir)--workdir[Optional directory to change to after detaching.:WORKING_DIRECTORY]' \ + '(-q --quiet)'{-q,--quiet}'[Don"t show as much output.]' \ + '(-C --no-color)'{-C,--no-color}'[Don"t display colors.]' \ + '(--version)--version[show program"s version number and exit]' \ + '(- : *)'{-h,--help}'[show this help message and exit]' \ + '*:: :->subcmds' && return 0 + +if (( CURRENT == 1 )); then + _describe -t commands "celery subcommand" _1st_arguments + return +fi + +case "$words[1]" in + worker) + _arguments \ + '(-C --concurrency=)'{-C,--concurrency=}'[Number of child processes processing the queue. The default is the number of CPUs.]' \ + '(--pool)--pool=:::(prefork eventlet gevent threads solo)' \ + '(--purge --discard)'{--discard,--purge}'[Purges all waiting tasks before the daemon is started.]' \ + '(-f --logfile=)'{-f,--logfile=}'[Path to log file. If no logfile is specified, stderr is used.]' \ + '(--loglevel=)--loglevel=:::(critical error warning info debug)' \ + '(-N --hostname=)'{-N,--hostname=}'[Set custom hostname, e.g. "foo@example.com".]' \ + '(-B --beat)'{-B,--beat}'[Also run the celerybeat periodic task scheduler.]' \ + '(-s --schedule=)'{-s,--schedule=}'[Path to the schedule database if running with the -B option. Defaults to celerybeat-schedule.]' \ + '(-S --statedb=)'{-S,--statedb=}'[Path to the state database.Default: None]' \ + '(-E --events)'{-E,--events}'[Send events that can be captured by monitors like celeryev, celerymon, and others.]' \ + '(--time-limit=)--time-limit=[nables a hard time limit (in seconds int/float) for tasks]' \ + '(--soft-time-limit=)--soft-time-limit=[Enables a soft time limit (in seconds int/float) for tasks]' \ + '(--maxtasksperchild=)--maxtasksperchild=[Maximum number of tasks a pool worker can execute before it"s terminated and replaced by a new worker.]' \ + '(-Q --queues=)'{-Q,--queues=}'[List of queues to enable for this worker, separated by comma. By default all configured queues are enabled.]' \ + '(-I --include=)'{-I,--include=}'[Comma separated list of additional modules to import.]' \ + '(--pidfile=)--pidfile=[Optional file used to store the process pid.]' \ + '(--autoscale=)--autoscale=[Enable autoscaling by providing max_concurrency, min_concurrency.]' \ + '(--autoreload)--autoreload[Enable autoreloading.]' \ + '(--no-execv)--no-execv[Don"t do execv after multiprocessing child fork.]' + compadd -a ifargs + ;; + inspect) + _values -s \ + 'active[dump active tasks (being processed)]' \ + 'active_queues[dump queues being consumed from]' \ + 'ping[ping worker(s)]' \ + 'registered[dump of registered tasks]' \ + 'report[get bugreport info]' \ + 'reserved[dump reserved tasks (waiting to be processed)]' \ + 'revoked[dump of revoked task ids]' \ + 'scheduled[dump scheduled tasks (eta/countdown/retry)]' \ + 'stats[dump worker statistics]' + compadd -a controlargs ifargs + ;; + control) + _values -s \ + 'add_consumer[tell worker(s) to start consuming a queue]' \ + 'autoscale[change autoscale settings]' \ + 'cancel_consumer[tell worker(s) to stop consuming a queue]' \ + 'disable_events[tell worker(s) to disable events]' \ + 'enable_events[tell worker(s) to enable events]' \ + 'pool_grow[start more pool processes]' \ + 'pool_shrink[use less pool processes]' \ + 'rate_limit[tell worker(s) to modify the rate limit for a task type]' \ + 'time_limit[tell worker(s) to modify the time limit for a task type.]' + compadd -a controlargs ifargs + ;; + multi) + _values -s \ + '--nosplash[Don"t display program info.]' \ + '--verbose[Show more output.]' \ + '--no-color[Don"t display colors.]' \ + '--quiet[Don"t show as much output.]' \ + 'start' 'restart' 'stopwait' 'stop' 'show' \ + 'names' 'expand' 'get' 'kill' + compadd -a ifargs + ;; + amqp) + _values -s \ + 'queue.declare' 'queue.purge' 'exchange.delete' 'basic.publish' \ + 'exchange.declare' 'queue.delete' 'queue.bind' 'basic.get' + ;; + list) + _values -s, 'bindings' + ;; + shell) + _values -s \ + '--ipython[force iPython.]' \ + '--bpython[force bpython.]' \ + '--python[force default Python shell.]' \ + '--without-tasks[don"t add tasks to locals.]' \ + '--eventlet[use eventlet.]' \ + '--gevent[use gevent.]' + compadd -a ifargs + ;; + beat) + _arguments \ + '(-s --schedule=)'{-s,--schedule=}'[Path to the schedule database. Defaults to celerybeat-schedule.]' \ + '(-S --scheduler=)'{-S,--scheduler=}'[Scheduler class to use. Default is celery.beat.PersistentScheduler.]' \ + '(--max-interval)--max-interval[]' + compadd -a dopts fargs + ;; + events) + _arguments \ + '(-d --dump)'{-d,--dump}'[Dump events to stdout.]' \ + '(-c --camera=)'{-c,--camera=}'[Take snapshots of events using this camera.]' \ + '(-F --frequency=)'{-F,--frequency=}'[Camera: Shutter frequency. Default is every 1.0 seconds.]' \ + '(-r --maxrate=)'{-r,--maxrate=}'[Camera: Optional shutter rate limit (e.g. 10/m).]' + compadd -a dopts fargs + ;; + *) + ;; + esac +} diff --git a/requirements/default.txt b/requirements/default.txt new file mode 100644 index 0000000..23406d5 --- /dev/null +++ b/requirements/default.txt @@ -0,0 +1,3 @@ +pytz>dev +billiard>=3.3.0.18,<3.4 +kombu>=3.0.21,<3.1 diff --git a/requirements/dev.txt b/requirements/dev.txt new file mode 100644 index 0000000..5672438 --- /dev/null +++ b/requirements/dev.txt @@ -0,0 +1,3 @@ +https://github.com/celery/py-amqp/zipball/master +https://github.com/celery/billiard/zipball/master +https://github.com/celery/kombu/zipball/master diff --git a/requirements/docs.txt b/requirements/docs.txt new file mode 100644 index 0000000..3854f9e --- /dev/null +++ b/requirements/docs.txt @@ -0,0 +1,2 @@ +Sphinx +SQLAlchemy diff --git a/requirements/extras/auth.txt b/requirements/extras/auth.txt new file mode 100644 index 0000000..8c388fa --- /dev/null +++ b/requirements/extras/auth.txt @@ -0,0 +1 @@ +pyOpenSSL diff --git a/requirements/extras/beanstalk.txt b/requirements/extras/beanstalk.txt new file mode 100644 index 0000000..c62c81b --- /dev/null +++ b/requirements/extras/beanstalk.txt @@ -0,0 +1 @@ +beanstalkc diff --git a/requirements/extras/cassandra.txt b/requirements/extras/cassandra.txt new file mode 100644 index 0000000..a58d089 --- /dev/null +++ b/requirements/extras/cassandra.txt @@ -0,0 +1 @@ +pycassa diff --git a/requirements/extras/couchbase.txt b/requirements/extras/couchbase.txt new file mode 100644 index 0000000..0b3044b --- /dev/null +++ b/requirements/extras/couchbase.txt @@ -0,0 +1 @@ +couchbase diff --git a/requirements/extras/couchdb.txt b/requirements/extras/couchdb.txt new file mode 100644 index 0000000..3e100d4 --- /dev/null +++ b/requirements/extras/couchdb.txt @@ -0,0 +1 @@ +couchdb diff --git a/requirements/extras/eventlet.txt b/requirements/extras/eventlet.txt new file mode 100644 index 0000000..bfe34bc --- /dev/null +++ b/requirements/extras/eventlet.txt @@ -0,0 +1 @@ +eventlet diff --git a/requirements/extras/gevent.txt b/requirements/extras/gevent.txt new file mode 100644 index 0000000..4a63abe --- /dev/null +++ b/requirements/extras/gevent.txt @@ -0,0 +1 @@ +gevent diff --git a/requirements/extras/librabbitmq.txt b/requirements/extras/librabbitmq.txt new file mode 100644 index 0000000..8f9a2db --- /dev/null +++ b/requirements/extras/librabbitmq.txt @@ -0,0 +1 @@ +librabbitmq>=1.5.0 diff --git a/requirements/extras/memcache.txt b/requirements/extras/memcache.txt new file mode 100644 index 0000000..a19a29c --- /dev/null +++ b/requirements/extras/memcache.txt @@ -0,0 +1 @@ +pylibmc diff --git a/requirements/extras/mongodb.txt b/requirements/extras/mongodb.txt new file mode 100644 index 0000000..19e59fe --- /dev/null +++ b/requirements/extras/mongodb.txt @@ -0,0 +1 @@ +pymongo>=2.6.2 diff --git a/requirements/extras/msgpack.txt b/requirements/extras/msgpack.txt new file mode 100644 index 0000000..bf7cb78 --- /dev/null +++ b/requirements/extras/msgpack.txt @@ -0,0 +1 @@ +msgpack-python>=0.3.0 diff --git a/requirements/extras/pyro.txt b/requirements/extras/pyro.txt new file mode 100644 index 0000000..d19b0db --- /dev/null +++ b/requirements/extras/pyro.txt @@ -0,0 +1 @@ +pyro4 diff --git a/requirements/extras/redis.txt b/requirements/extras/redis.txt new file mode 100644 index 0000000..4a645b4 --- /dev/null +++ b/requirements/extras/redis.txt @@ -0,0 +1 @@ +redis>=2.8.0 diff --git a/requirements/extras/slmq.txt b/requirements/extras/slmq.txt new file mode 100644 index 0000000..2f06ed2 --- /dev/null +++ b/requirements/extras/slmq.txt @@ -0,0 +1 @@ +softlayer_messaging>=1.0.3 diff --git a/requirements/extras/sqlalchemy.txt b/requirements/extras/sqlalchemy.txt new file mode 100644 index 0000000..39fb2be --- /dev/null +++ b/requirements/extras/sqlalchemy.txt @@ -0,0 +1 @@ +sqlalchemy diff --git a/requirements/extras/sqs.txt b/requirements/extras/sqs.txt new file mode 100644 index 0000000..66b9583 --- /dev/null +++ b/requirements/extras/sqs.txt @@ -0,0 +1 @@ +boto>=2.13.3 diff --git a/requirements/extras/threads.txt b/requirements/extras/threads.txt new file mode 100644 index 0000000..c88d74e --- /dev/null +++ b/requirements/extras/threads.txt @@ -0,0 +1 @@ +threadpool diff --git a/requirements/extras/yaml.txt b/requirements/extras/yaml.txt new file mode 100644 index 0000000..17bf7fd --- /dev/null +++ b/requirements/extras/yaml.txt @@ -0,0 +1 @@ +PyYAML>=3.10 diff --git a/requirements/extras/zeromq.txt b/requirements/extras/zeromq.txt new file mode 100644 index 0000000..d34ee10 --- /dev/null +++ b/requirements/extras/zeromq.txt @@ -0,0 +1 @@ +pyzmq>=13.1.0 diff --git a/requirements/extras/zookeeper.txt b/requirements/extras/zookeeper.txt new file mode 100644 index 0000000..81893ea --- /dev/null +++ b/requirements/extras/zookeeper.txt @@ -0,0 +1 @@ +kazoo>=1.3.1 diff --git a/requirements/jython.txt b/requirements/jython.txt new file mode 100644 index 0000000..4427a9a --- /dev/null +++ b/requirements/jython.txt @@ -0,0 +1,2 @@ +threadpool +multiprocessing diff --git a/requirements/pkgutils.txt b/requirements/pkgutils.txt new file mode 100644 index 0000000..de2162e --- /dev/null +++ b/requirements/pkgutils.txt @@ -0,0 +1,7 @@ +setuptools>=1.3.2 +wheel +paver +flake8 +flakeplus +tox +Sphinx-PyPI-upload diff --git a/requirements/security.txt b/requirements/security.txt new file mode 100644 index 0000000..9292484 --- /dev/null +++ b/requirements/security.txt @@ -0,0 +1 @@ +PyOpenSSL diff --git a/requirements/test-ci.txt b/requirements/test-ci.txt new file mode 100644 index 0000000..92ee759 --- /dev/null +++ b/requirements/test-ci.txt @@ -0,0 +1,6 @@ +coverage>=3.0 +coveralls +redis +#pymongo +#SQLAlchemy +PyOpenSSL diff --git a/requirements/test.txt b/requirements/test.txt new file mode 100644 index 0000000..0d0b3c6 --- /dev/null +++ b/requirements/test.txt @@ -0,0 +1,3 @@ +unittest2>=0.5.1 +nose +mock>=1.0.1 diff --git a/requirements/test3.txt b/requirements/test3.txt new file mode 100644 index 0000000..f7ca6c0 --- /dev/null +++ b/requirements/test3.txt @@ -0,0 +1,3 @@ +nose +# FIXME required by kombu.tests.case +mock >=1.0.1 diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..7c238dd --- /dev/null +++ b/setup.cfg @@ -0,0 +1,24 @@ +[nosetests] +where = celery/tests + +[build_sphinx] +source-dir = docs/ +build-dir = docs/.build +all_files = 1 + +[upload_sphinx] +upload-dir = docs/.build/html + +[bdist_rpm] +requires = pytz >= 2011b + billiard >= 3.3.0.18 + kombu >= 3.0.21 + +[wheel] +universal = 1 + +[egg_info] +tag_build = +tag_date = 0 +tag_svn_revision = 0 + diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..0352b14 --- /dev/null +++ b/setup.py @@ -0,0 +1,166 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +try: + from setuptools import setup, find_packages + from setuptools.command.test import test + is_setuptools = True +except ImportError: + raise + from ez_setup import use_setuptools + use_setuptools() + from setuptools import setup, find_packages # noqa + from setuptools.command.test import test # noqa + is_setuptools = False + +import os +import sys +import codecs + +CELERY_COMPAT_PROGRAMS = int(os.environ.get('CELERY_COMPAT_PROGRAMS', 1)) + +if sys.version_info < (2, 6): + raise Exception('Celery 3.1 requires Python 2.6 or higher.') + +NAME = 'celery' +entrypoints = {} +extra = {} + +# -*- Classifiers -*- + +classes = """ + Development Status :: 5 - Production/Stable + License :: OSI Approved :: BSD License + Topic :: System :: Distributed Computing + Topic :: Software Development :: Object Brokering + Programming Language :: Python + Programming Language :: Python :: 2 + Programming Language :: Python :: 2.6 + Programming Language :: Python :: 2.7 + Programming Language :: Python :: 3 + Programming Language :: Python :: 3.3 + Programming Language :: Python :: 3.4 + Programming Language :: Python :: Implementation :: CPython + Programming Language :: Python :: Implementation :: PyPy + Programming Language :: Python :: Implementation :: Jython + Operating System :: OS Independent + Operating System :: POSIX + Operating System :: Microsoft :: Windows + Operating System :: MacOS :: MacOS X +""" +classifiers = [s.strip() for s in classes.split('\n') if s] + +PY3 = sys.version_info[0] == 3 +JYTHON = sys.platform.startswith('java') +PYPY = hasattr(sys, 'pypy_version_info') + +# -*- Distribution Meta -*- + +import re +re_meta = re.compile(r'__(\w+?)__\s*=\s*(.*)') +re_vers = re.compile(r'VERSION\s*=.*?\((.*?)\)') +re_doc = re.compile(r'^"""(.+?)"""') +rq = lambda s: s.strip("\"'") + + +def add_default(m): + attr_name, attr_value = m.groups() + return ((attr_name, rq(attr_value)), ) + + +def add_version(m): + v = list(map(rq, m.groups()[0].split(', '))) + return (('VERSION', '.'.join(v[0:3]) + ''.join(v[3:])), ) + + +def add_doc(m): + return (('doc', m.groups()[0]), ) + +pats = {re_meta: add_default, + re_vers: add_version, + re_doc: add_doc} +here = os.path.abspath(os.path.dirname(__file__)) +with open(os.path.join(here, 'celery/__init__.py')) as meta_fh: + meta = {} + for line in meta_fh: + if line.strip() == '# -eof meta-': + break + for pattern, handler in pats.items(): + m = pattern.match(line.strip()) + if m: + meta.update(handler(m)) + +# -*- Installation Requires -*- + +py_version = sys.version_info + + +def strip_comments(l): + return l.split('#', 1)[0].strip() + + +def reqs(*f): + return [ + r for r in ( + strip_comments(l) for l in open( + os.path.join(os.getcwd(), 'requirements', *f)).readlines() + ) if r] + +install_requires = reqs('default.txt') +if JYTHON: + install_requires.extend(reqs('jython.txt')) + +# -*- Tests Requires -*- + +tests_require = reqs('test3.txt' if PY3 else 'test.txt') + +# -*- Long Description -*- + +if os.path.exists('README.rst'): + long_description = codecs.open('README.rst', 'r', 'utf-8').read() +else: + long_description = 'See http://pypi.python.org/pypi/celery' + +# -*- Entry Points -*- # + +console_scripts = entrypoints['console_scripts'] = [ + 'celery = celery.__main__:main', +] + +if CELERY_COMPAT_PROGRAMS: + console_scripts.extend([ + 'celeryd = celery.__main__:_compat_worker', + 'celerybeat = celery.__main__:_compat_beat', + 'celeryd-multi = celery.__main__:_compat_multi', + ]) + +if is_setuptools: + extras = lambda *p: reqs('extras', *p) + # Celery specific + specific_list = ['auth', 'cassandra', 'memcache', 'couchbase', 'threads', + 'eventlet', 'gevent', 'msgpack', 'yaml', 'redis', + 'mongodb', 'sqs', 'couchdb', 'beanstalk', 'zookeeper', + 'zeromq', 'sqlalchemy', 'librabbitmq', 'pyro', 'slmq'] + extras_require = dict((x, extras(x + '.txt')) for x in specific_list) + extra['extras_require'] = extras_require + +# -*- %%% -*- + +setup( + name=NAME, + version=meta['VERSION'], + description=meta['doc'], + author=meta['author'], + author_email=meta['contact'], + url=meta['homepage'], + platforms=['any'], + license='BSD', + packages=find_packages(exclude=['ez_setup', 'tests', 'tests.*']), + zip_safe=False, + install_requires=install_requires, + tests_require=tests_require, + test_suite='nose.collector', + classifiers=classifiers, + entry_points=entrypoints, + long_description=long_description, + **extra)