Features: 1) Update SKU backfill migration to process in batches of 10,000; 2) Add bulk update for improved performance; 3) Refactor SKU generation logic to remove reliance on Product model;

Fixes: 1) Prevent repeated queries by reusing the last processed primary key; 2) Avoid redundant SKU existence checks through optimized logic;

Extra: 1) Set migration atomicity to False for better batch handling; 2) Minor code cleanup and restructuring;
This commit is contained in:
Egor Pavlovich Gorbunov 2025-09-02 09:52:21 +03:00
parent 4e2c4c7ccb
commit efb9aaeeef

View file

@ -1,14 +1,12 @@
from django.db import migrations, transaction
def generate_unique_sku(Product, make_candidate, taken):
def generate_unique_sku(make_candidate, taken):
while True:
candidate = make_candidate()
if candidate in taken:
continue
if not Product.objects.filter(sku=candidate).exists():
taken.add(candidate)
return candidate
c = make_candidate()
if c not in taken:
taken.add(c)
return c
def backfill_sku(apps, schema_editor):
@ -17,17 +15,24 @@ def backfill_sku(apps, schema_editor):
taken = set(Product.objects.exclude(sku__isnull=True).values_list("sku", flat=True))
qs = Product.objects.filter(sku__isnull=True)
BATCH = 500
BATCH = 10000
last_pk = 0
while True:
ids = list(
Product.objects.filter(sku__isnull=True, pk__gt=last_pk).order_by("pk").values_list("pk", flat=True)[:BATCH]
)
if not ids:
break
updates = []
for pk in ids:
updates.append(Product(pk=pk, sku=generate_unique_sku(make_candidate, taken)))
start = 0
count = qs.count()
while start < count:
with transaction.atomic():
for product in qs.order_by("pk")[start : start + BATCH]:
product.sku = generate_unique_sku(Product, make_candidate, taken)
product.save(update_fields=["sku"])
start += BATCH
Product.objects.bulk_update(updates, ["sku"], batch_size=BATCH)
last_pk = ids[-1]
def noop(apps, schema_editor):
@ -35,6 +40,8 @@ def noop(apps, schema_editor):
class Migration(migrations.Migration):
atomic = False
dependencies = [
("core", "0037_product_sku"),
]