Features: 1) Update SKU backfill migration to process in batches of 10,000; 2) Add bulk update for improved performance; 3) Refactor SKU generation logic to remove reliance on Product model;

Fixes: 1) Prevent repeated queries by reusing the last processed primary key; 2) Avoid redundant SKU existence checks through optimized logic; Extra: 1) Set migration atomicity to False for better batch handling; 2) Minor code cleanup and restructuring;
2025-09-02 09:52:21 +03:00 · 2025-09-02 09:52:21 +03:00 · efb9aaeeef
commit efb9aaeeef
parent 4e2c4c7ccb
1 changed files with 23 additions and 16 deletions
--- a/core/migrations/0038_backfill_product_sku.py
+++ b/core/migrations/0038_backfill_product_sku.py
@ -1,14 +1,12 @@
 from django.db import migrations, transaction


-def generate_unique_sku(Product, make_candidate, taken):
+def generate_unique_sku(make_candidate, taken):
    while True:
-        candidate = make_candidate()
-        if candidate in taken:
-            continue
-        if not Product.objects.filter(sku=candidate).exists():
-            taken.add(candidate)
-            return candidate
+        c = make_candidate()
+        if c not in taken:
+            taken.add(c)
+            return c


 def backfill_sku(apps, schema_editor):
@ -17,17 +15,24 @@ def backfill_sku(apps, schema_editor):

    taken = set(Product.objects.exclude(sku__isnull=True).values_list("sku", flat=True))

-    qs = Product.objects.filter(sku__isnull=True)
-    BATCH = 500
+    BATCH = 10000
+    last_pk = 0
+
+    while True:
+        ids = list(
+            Product.objects.filter(sku__isnull=True, pk__gt=last_pk).order_by("pk").values_list("pk", flat=True)[:BATCH]
+        )
+        if not ids:
+            break
+
+        updates = []
+        for pk in ids:
+            updates.append(Product(pk=pk, sku=generate_unique_sku(make_candidate, taken)))

-    start = 0
-    count = qs.count()
-    while start < count:
        with transaction.atomic():
-            for product in qs.order_by("pk")[start : start + BATCH]:
-                product.sku = generate_unique_sku(Product, make_candidate, taken)
-                product.save(update_fields=["sku"])
-        start += BATCH
+            Product.objects.bulk_update(updates, ["sku"], batch_size=BATCH)
+
+        last_pk = ids[-1]


 def noop(apps, schema_editor):
@ -35,6 +40,8 @@ def noop(apps, schema_editor):


 class Migration(migrations.Migration):
+    atomic = False
+
    dependencies = [
        ("core", "0037_product_sku"),
    ]