From 3c6c342129b3ffa8097ed369115a2fa9aa30ca13 Mon Sep 17 00:00:00 2001 From: Dustin Broderick Date: Tue, 28 Nov 2017 12:58:57 -0700 Subject: [PATCH 1/9] Removes atomic transaction for buildwatson --- watson/management/commands/buildwatson.py | 1 - 1 file changed, 1 deletion(-) diff --git a/watson/management/commands/buildwatson.py b/watson/management/commands/buildwatson.py index 59703d2..ce3e4c6 100644 --- a/watson/management/commands/buildwatson.py +++ b/watson/management/commands/buildwatson.py @@ -75,7 +75,6 @@ class Command(BaseCommand): help='Search engine models are registered with' ) - @transaction.atomic() def handle(self, *args, **options): """Runs the management command.""" activate(settings.LANGUAGE_CODE) From ab65d162ee2176209265f424735ef556fd75e1dd Mon Sep 17 00:00:00 2001 From: Dustin Broderick Date: Tue, 28 Nov 2017 14:19:06 -0700 Subject: [PATCH 2/9] Adds batch_size option to buildwatson --- watson/management/commands/buildwatson.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/watson/management/commands/buildwatson.py b/watson/management/commands/buildwatson.py index ce3e4c6..67e609e 100644 --- a/watson/management/commands/buildwatson.py +++ b/watson/management/commands/buildwatson.py @@ -28,7 +28,7 @@ def get_engine(engine_slug_): raise CommandError("Search Engine \"%s\" is not registered!" % force_text(engine_slug_)) -def rebuild_index_for_model(model_, engine_slug_, verbosity_): +def rebuild_index_for_model(model_, engine_slug_, verbosity_, batch_size_=100): """rebuilds index for a model""" search_engine_ = get_engine(engine_slug_) @@ -74,6 +74,13 @@ class Command(BaseCommand): action="store", help='Search engine models are registered with' ) + parser.add_argument( + '--batch_size', + action='store', + default=100, + type=int, + help="The batchsize with which entries will be added to the index." + ) def handle(self, *args, **options): """Runs the management command.""" @@ -88,6 +95,8 @@ class Command(BaseCommand): engine_slug = "default" engine_selected = False + batch_size = options.get("batch_size") + # work-around for legacy optparser hack in BaseCommand. In Django=1.10 the # args are collected in options['apps'], but in earlier versions they are # kept in args. From 8fd2573747b416c89148b75458997ae4fba4c187 Mon Sep 17 00:00:00 2001 From: Dustin Broderick Date: Tue, 28 Nov 2017 14:22:55 -0700 Subject: [PATCH 3/9] Adds batch_size functionality to buildwatson --- watson/management/commands/buildwatson.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/watson/management/commands/buildwatson.py b/watson/management/commands/buildwatson.py index 67e609e..d14b9b2 100644 --- a/watson/management/commands/buildwatson.py +++ b/watson/management/commands/buildwatson.py @@ -58,7 +58,7 @@ def rebuild_index_for_model(model_, engine_slug_, verbosity_, batch_size_=100): engine_slug=force_text(engine_slug_), ) ) - _bulk_save_search_entries(iter_search_entries()) + _bulk_save_search_entries(iter_search_entries(), batch_size=batch_size_) return local_refreshed_model_count[0] @@ -131,7 +131,7 @@ class Command(BaseCommand): if verbosity >= 3: print("Using search engine \"%s\"" % engine_slug) for model in models: - refreshed_model_count += rebuild_index_for_model(model, engine_slug, verbosity) + refreshed_model_count += rebuild_index_for_model(model, engine_slug, verbosity, batch_size_=batch_size) else: # full rebuild (for one or all search engines) if engine_selected: @@ -147,7 +147,7 @@ class Command(BaseCommand): registered_models = search_engine.get_registered_models() # Rebuild the index for all registered models. for model in registered_models: - refreshed_model_count += rebuild_index_for_model(model, engine_slug, verbosity) + refreshed_model_count += rebuild_index_for_model(model, engine_slug, verbosity, batch_size_=batch_size) # Clean out any search entries that exist for stale content types. # Only do it during full rebuild From 1d293535b43db3e32bf90232b5e53d0705ca0f85 Mon Sep 17 00:00:00 2001 From: Dustin Broderick Date: Tue, 28 Nov 2017 14:33:58 -0700 Subject: [PATCH 4/9] Adds missing paren --- watson/management/commands/buildwatson.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/watson/management/commands/buildwatson.py b/watson/management/commands/buildwatson.py index 50d5c76..446e2ae 100644 --- a/watson/management/commands/buildwatson.py +++ b/watson/management/commands/buildwatson.py @@ -88,7 +88,7 @@ class Command(BaseCommand): model registration. WARNING: buildwatson must be rerun if the filter \ changes or the index will be incomplete." ) - parser.add_argument + parser.add_argument( '--batch_size', action='store', default=100, From c5cd52d227dde3d16453d1274f3131adcd1f03dd Mon Sep 17 00:00:00 2001 From: Dustin Broderick Date: Wed, 29 Nov 2017 09:25:44 -0700 Subject: [PATCH 5/9] Addesses linter issues --- watson/management/commands/buildwatson.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/watson/management/commands/buildwatson.py b/watson/management/commands/buildwatson.py index 446e2ae..b74a109 100644 --- a/watson/management/commands/buildwatson.py +++ b/watson/management/commands/buildwatson.py @@ -81,9 +81,9 @@ class Command(BaseCommand): help='Search engine models are registered with' ) parser.add_argument( - '--slim', - action='store_true', - default=False, + '--slim', + action='store_true', + default=False, help="Only include objects which satisfy the filter specified during \ model registration. WARNING: buildwatson must be rerun if the filter \ changes or the index will be incomplete." From 92020f42f1b17f691f2f3a3071282e55ea73413e Mon Sep 17 00:00:00 2001 From: Dustin Broderick Date: Wed, 6 Dec 2017 09:58:43 -0700 Subject: [PATCH 6/9] Adds non-atomic option to buildwatson --- watson/management/commands/buildwatson.py | 24 +++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/watson/management/commands/buildwatson.py b/watson/management/commands/buildwatson.py index b74a109..f367473 100644 --- a/watson/management/commands/buildwatson.py +++ b/watson/management/commands/buildwatson.py @@ -88,6 +88,13 @@ class Command(BaseCommand): model registration. WARNING: buildwatson must be rerun if the filter \ changes or the index will be incomplete." ) + parser.add_argument( + '--non-atomic', + action='store_true', + default=False, + help="Commit index entries in batches. WARNING: if buildwatson failse, \ + the index will be incomplete." + ) parser.add_argument( '--batch_size', action='store', @@ -112,6 +119,7 @@ class Command(BaseCommand): # Do we do a partial index and how large should the batches be? slim = options.get("slim") batch_size = options.get("batch_size") + non_atomic = options.get("non-atomic") # work-around for legacy optparser hack in BaseCommand. In Django=1.10 the # args are collected in options['apps'], but in earlier versions they are @@ -147,7 +155,13 @@ class Command(BaseCommand): if verbosity >= 3: print("Using search engine \"%s\"" % engine_slug) for model in models: - refreshed_model_count += rebuild_index_for_model(model, engine_slug, verbosity, slim_=slim, batch_size_=batch_size) + refreshed_model_count += rebuild_index_for_model( + model, + engine_slug, + verbosity, + slim_=slim, + batch_size_=batch_size, + non_atomic_=non_atomic) else: # full rebuild (for one or all search engines) if engine_selected: @@ -163,7 +177,13 @@ class Command(BaseCommand): registered_models = search_engine.get_registered_models() # Rebuild the index for all registered models. for model in registered_models: - refreshed_model_count += rebuild_index_for_model(model, engine_slug, verbosity, slim_=slim, batch_size_=batch_size) + refreshed_model_count += rebuild_index_for_model( + model, + engine_slug, + verbosity, + slim_=slim, + batch_size_=batch_size, + non_atomic_=non_atomic) # Clean out any search entries that exist for stale content types. # Only do it during full rebuild From c3f00935ff217a61e6e67d8dd522d34016dc5912 Mon Sep 17 00:00:00 2001 From: Dustin Broderick Date: Wed, 6 Dec 2017 10:37:06 -0700 Subject: [PATCH 7/9] Adds logic for non-atomic indexing --- watson/management/commands/buildwatson.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/watson/management/commands/buildwatson.py b/watson/management/commands/buildwatson.py index f367473..79e1599 100644 --- a/watson/management/commands/buildwatson.py +++ b/watson/management/commands/buildwatson.py @@ -28,7 +28,7 @@ def get_engine(engine_slug_): raise CommandError("Search Engine \"%s\" is not registered!" % force_text(engine_slug_)) -def rebuild_index_for_model(model_, engine_slug_, verbosity_, slim_=False, batch_size_=100): +def rebuild_index_for_model(model_, engine_slug_, verbosity_, slim_=False, batch_size_=100, non_atomic_=False): """rebuilds index for a model""" search_engine_ = get_engine(engine_slug_) @@ -64,7 +64,11 @@ def rebuild_index_for_model(model_, engine_slug_, verbosity_, slim_=False, batch engine_slug=force_text(engine_slug_), ) ) - _bulk_save_search_entries(iter_search_entries(), batch_size=batch_size_) + if non_atomic_: + _bulk_save_search_entries(iter_search_entries(), batch_size=batch_size_) + else: + with transaction.atomic(): + _bulk_save_search_entries(iter_search_entries(), batch_size=batch_size_) return local_refreshed_model_count[0] From 1641b73d608d49a54feb8227242ea82bd4e6a3be Mon Sep 17 00:00:00 2001 From: Dustin Broderick Date: Wed, 6 Dec 2017 11:13:10 -0700 Subject: [PATCH 8/9] Renames non-atomic flag to non_atomic (to prevent flag being non) --- watson/management/commands/buildwatson.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/watson/management/commands/buildwatson.py b/watson/management/commands/buildwatson.py index 79e1599..f677377 100644 --- a/watson/management/commands/buildwatson.py +++ b/watson/management/commands/buildwatson.py @@ -93,7 +93,7 @@ class Command(BaseCommand): changes or the index will be incomplete." ) parser.add_argument( - '--non-atomic', + '--non_atomic', action='store_true', default=False, help="Commit index entries in batches. WARNING: if buildwatson failse, \ @@ -123,7 +123,7 @@ class Command(BaseCommand): # Do we do a partial index and how large should the batches be? slim = options.get("slim") batch_size = options.get("batch_size") - non_atomic = options.get("non-atomic") + non_atomic = options.get("non_atomic") # work-around for legacy optparser hack in BaseCommand. In Django=1.10 the # args are collected in options['apps'], but in earlier versions they are From 0a8fe02844b361162398780736f2d6a8e73bf6b7 Mon Sep 17 00:00:00 2001 From: Dustin Broderick Date: Wed, 6 Dec 2017 11:42:57 -0700 Subject: [PATCH 9/9] Fixes issue with Flake8 linting --- watson/management/commands/buildwatson.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/watson/management/commands/buildwatson.py b/watson/management/commands/buildwatson.py index f677377..e1c6da9 100644 --- a/watson/management/commands/buildwatson.py +++ b/watson/management/commands/buildwatson.py @@ -124,7 +124,7 @@ class Command(BaseCommand): slim = options.get("slim") batch_size = options.get("batch_size") non_atomic = options.get("non_atomic") - + # work-around for legacy optparser hack in BaseCommand. In Django=1.10 the # args are collected in options['apps'], but in earlier versions they are # kept in args.