From ab65d162ee2176209265f424735ef556fd75e1dd Mon Sep 17 00:00:00 2001 From: Dustin Broderick Date: Tue, 28 Nov 2017 14:19:06 -0700 Subject: [PATCH 1/2] Adds batch_size option to buildwatson --- watson/management/commands/buildwatson.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/watson/management/commands/buildwatson.py b/watson/management/commands/buildwatson.py index ce3e4c6..67e609e 100644 --- a/watson/management/commands/buildwatson.py +++ b/watson/management/commands/buildwatson.py @@ -28,7 +28,7 @@ def get_engine(engine_slug_): raise CommandError("Search Engine \"%s\" is not registered!" % force_text(engine_slug_)) -def rebuild_index_for_model(model_, engine_slug_, verbosity_): +def rebuild_index_for_model(model_, engine_slug_, verbosity_, batch_size_=100): """rebuilds index for a model""" search_engine_ = get_engine(engine_slug_) @@ -74,6 +74,13 @@ class Command(BaseCommand): action="store", help='Search engine models are registered with' ) + parser.add_argument( + '--batch_size', + action='store', + default=100, + type=int, + help="The batchsize with which entries will be added to the index." + ) def handle(self, *args, **options): """Runs the management command.""" @@ -88,6 +95,8 @@ class Command(BaseCommand): engine_slug = "default" engine_selected = False + batch_size = options.get("batch_size") + # work-around for legacy optparser hack in BaseCommand. In Django=1.10 the # args are collected in options['apps'], but in earlier versions they are # kept in args. From 8fd2573747b416c89148b75458997ae4fba4c187 Mon Sep 17 00:00:00 2001 From: Dustin Broderick Date: Tue, 28 Nov 2017 14:22:55 -0700 Subject: [PATCH 2/2] Adds batch_size functionality to buildwatson --- watson/management/commands/buildwatson.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/watson/management/commands/buildwatson.py b/watson/management/commands/buildwatson.py index 67e609e..d14b9b2 100644 --- a/watson/management/commands/buildwatson.py +++ b/watson/management/commands/buildwatson.py @@ -58,7 +58,7 @@ def rebuild_index_for_model(model_, engine_slug_, verbosity_, batch_size_=100): engine_slug=force_text(engine_slug_), ) ) - _bulk_save_search_entries(iter_search_entries()) + _bulk_save_search_entries(iter_search_entries(), batch_size=batch_size_) return local_refreshed_model_count[0] @@ -131,7 +131,7 @@ class Command(BaseCommand): if verbosity >= 3: print("Using search engine \"%s\"" % engine_slug) for model in models: - refreshed_model_count += rebuild_index_for_model(model, engine_slug, verbosity) + refreshed_model_count += rebuild_index_for_model(model, engine_slug, verbosity, batch_size_=batch_size) else: # full rebuild (for one or all search engines) if engine_selected: @@ -147,7 +147,7 @@ class Command(BaseCommand): registered_models = search_engine.get_registered_models() # Rebuild the index for all registered models. for model in registered_models: - refreshed_model_count += rebuild_index_for_model(model, engine_slug, verbosity) + refreshed_model_count += rebuild_index_for_model(model, engine_slug, verbosity, batch_size_=batch_size) # Clean out any search entries that exist for stale content types. # Only do it during full rebuild