From efb9aaeeef8b4677d0b9891e4b69a71a11518f0e Mon Sep 17 00:00:00 2001 From: Egor fureunoir Gorbunov Date: Tue, 2 Sep 2025 09:52:21 +0300 Subject: [PATCH] Features: 1) Update SKU backfill migration to process in batches of 10,000; 2) Add bulk update for improved performance; 3) Refactor SKU generation logic to remove reliance on Product model; Fixes: 1) Prevent repeated queries by reusing the last processed primary key; 2) Avoid redundant SKU existence checks through optimized logic; Extra: 1) Set migration atomicity to False for better batch handling; 2) Minor code cleanup and restructuring; --- core/migrations/0038_backfill_product_sku.py | 39 ++++++++++++-------- 1 file changed, 23 insertions(+), 16 deletions(-) diff --git a/core/migrations/0038_backfill_product_sku.py b/core/migrations/0038_backfill_product_sku.py index 61b5831e..428056bb 100644 --- a/core/migrations/0038_backfill_product_sku.py +++ b/core/migrations/0038_backfill_product_sku.py @@ -1,14 +1,12 @@ from django.db import migrations, transaction -def generate_unique_sku(Product, make_candidate, taken): +def generate_unique_sku(make_candidate, taken): while True: - candidate = make_candidate() - if candidate in taken: - continue - if not Product.objects.filter(sku=candidate).exists(): - taken.add(candidate) - return candidate + c = make_candidate() + if c not in taken: + taken.add(c) + return c def backfill_sku(apps, schema_editor): @@ -17,17 +15,24 @@ def backfill_sku(apps, schema_editor): taken = set(Product.objects.exclude(sku__isnull=True).values_list("sku", flat=True)) - qs = Product.objects.filter(sku__isnull=True) - BATCH = 500 + BATCH = 10000 + last_pk = 0 + + while True: + ids = list( + Product.objects.filter(sku__isnull=True, pk__gt=last_pk).order_by("pk").values_list("pk", flat=True)[:BATCH] + ) + if not ids: + break + + updates = [] + for pk in ids: + updates.append(Product(pk=pk, sku=generate_unique_sku(make_candidate, taken))) - start = 0 - count = qs.count() - while start < count: with transaction.atomic(): - for product in qs.order_by("pk")[start : start + BATCH]: - product.sku = generate_unique_sku(Product, make_candidate, taken) - product.save(update_fields=["sku"]) - start += BATCH + Product.objects.bulk_update(updates, ["sku"], batch_size=BATCH) + + last_pk = ids[-1] def noop(apps, schema_editor): @@ -35,6 +40,8 @@ def noop(apps, schema_editor): class Migration(migrations.Migration): + atomic = False + dependencies = [ ("core", "0037_product_sku"), ]