From 38b22704b1f4fdd33881fe0e8891e9a26e437e3d Mon Sep 17 00:00:00 2001 From: Egor fureunoir Gorbunov Date: Sun, 2 Nov 2025 03:09:16 +0300 Subject: [PATCH] Features: 1) Enhance product filter to support Elasticsearch ranking with preserved order; 2) Add personal_orders_only field to ProductDocument; 3) Introduce "name_exact" analyzer for case-insensitive exact matching in Elasticsearch; Fixes: 1) Fix Elasticsearch document mapping to include missing "ci" field; 2) Correct slug field index setting from index=False to indexed; 3) Update Elasticsearch search weights and sizes for improved relevance and pagination; Extra: 1) Refactor product search logic to prioritize partnumber > sku > slug > name.ci; 2) Add new analyzer "name_exact" for case-insensitive exact matching; 3) Adjust stock model to allow blank system_attributes; 4) Update migration to reflect JSONField changes; 5) Adjust Elasticsearch boosting weights for better ranking; 6) Increase category search size to 33 for consistency; 7) Add missing personal_orders_only field to ProductDocument. --- core/elasticsearch/__init__.py | 86 ++++++++++++------- core/elasticsearch/documents.py | 4 +- core/filters.py | 15 +++- .../0052_alter_stock_system_attributes.py | 18 ++++ core/models.py | 2 +- 5 files changed, 89 insertions(+), 36 deletions(-) create mode 100644 core/migrations/0052_alter_stock_system_attributes.py diff --git a/core/elasticsearch/__init__.py b/core/elasticsearch/__init__.py index 00a7bcdd..2549781a 100644 --- a/core/elasticsearch/__init__.py +++ b/core/elasticsearch/__init__.py @@ -58,32 +58,36 @@ functions = [ { "filter": Q("term", **{"_index": "products"}), "field_value_factor": { - "field": "rating", + "field": "category_priority", "modifier": "log1p", - "factor": 0.10, + "factor": 0.16, "missing": 0, }, - "weight": 0.3, + "weight": 0.36, + }, + { + "filter": Q("term", **{"_index": "products"}), + "field_value_factor": { + "field": "rating", + "modifier": "log1p", + "factor": 0.08, + "missing": 0, + }, + "weight": 0.25, }, { "filter": Q("term", **{"_index": "products"}), "field_value_factor": { "field": "total_orders", "modifier": "log1p", - "factor": 0.18, - "missing": 0, - }, - "weight": 0.4, - }, - { - "filter": Q("term", **{"_index": "products"}), - "field_value_factor": { - "field": "category_priority", - "modifier": "log1p", "factor": 0.15, "missing": 0, }, - "weight": 0.35, + "weight": 0.3, + }, + { + "filter": Q("bool", must=[Q("term", **{"_index": "products"}), Q("term", **{"personal_orders_only": False})]), + "weight": 0.7, }, { "filter": Q("term", **{"_index": "categories"}), @@ -120,10 +124,10 @@ def process_query( query = query.strip() try: exact_shoulds = [ - Q("term", **{"name.raw": {"value": query, "boost": 2.0}}), - Q("term", **{"slug": {"value": slugify(query), "boost": 1.5}}), - Q("term", **{"sku.raw": {"value": query.lower(), "boost": 6.0}}), - Q("term", **{"partnumber.raw": {"value": query.lower(), "boost": 7.0}}), + Q("term", **{"partnumber.raw": {"value": query.lower(), "boost": 20.0}}), + Q("term", **{"sku.raw": {"value": query.lower(), "boost": 16.0}}), + Q("term", **{"slug": {"value": slugify(query), "boost": 12.0}}), + Q("match", **{"name.ci": {"query": query, "boost": 8.0}}), ] lang = "" @@ -172,10 +176,9 @@ def process_query( if is_code_like: text_shoulds.extend( [ - Q("term", **{"sku.raw": {"value": query.lower(), "boost": 10.0}}), - Q("term", **{"partnumber.raw": {"value": query.lower(), "boost": 12.0}}), - Q("prefix", **{"sku.raw": {"value": query.lower(), "boost": 5.0}}), - Q("prefix", **{"partnumber.raw": {"value": query.lower(), "boost": 6.0}}), + Q("term", **{"partnumber.raw": {"value": query.lower(), "boost": 14.0}}), + Q("term", **{"sku.raw": {"value": query.lower(), "boost": 12.0}}), + Q("prefix", **{"partnumber.raw": {"value": query.lower(), "boost": 4.0}}), ] ) @@ -211,17 +214,17 @@ def process_query( resp_cats = None if "categories" in indexes: - search_cats = build_search(["categories"], size=22) + search_cats = build_search(["categories"], size=33) resp_cats = search_cats.execute() resp_brands = None if "brands" in indexes: - search_brands = build_search(["brands"], size=22) + search_brands = build_search(["brands"], size=33) resp_brands = search_brands.execute() resp_products = None if "products" in indexes: - search_products = build_search(["products"], size=44) + search_products = build_search(["products"], size=33) resp_products = search_products.execute() results: dict[str, list[dict[str, Any]]] = {"products": [], "categories": [], "brands": [], "posts": []} @@ -252,15 +255,27 @@ def process_query( Q("term", **{"name.raw": {"value": query}}), Q("term", **{"slug": {"value": slugify(query)}}), ], - "products": [ - Q("term", **{"name.raw": {"value": query}}), - Q("term", **{"slug": {"value": slugify(query)}}), - Q("term", **{"sku.raw": {"value": query.lower()}}), - Q("term", **{"partnumber.raw": {"value": query.lower()}}), - ], } - for idx_name in ("categories", "brands", "products"): + # Collect exact product matches in strict priority: partnumber > sku > slug > name.ci + if "products" in indexes: + product_exact_sequence = [ + Q("term", **{"partnumber.raw": {"value": query.lower()}}), + Q("term", **{"sku.raw": {"value": query.lower()}}), + Q("term", **{"slug": {"value": slugify(query)}}), + Q("match", **{"name.ci": {"query": query}}), + ] + for qx in product_exact_sequence: + try: + resp_exact = ( + Search(index=["products"]).query(qx).extra(size=5, track_total_hits=False).execute() + ) + except NotFoundError: + resp_exact = None + if resp_exact is not None and getattr(resp_exact, "hits", None): + _collect_hits(list(resp_exact.hits)) + + for idx_name in ("categories", "brands"): if idx_name in indexes: shoulds = exact_queries_by_index[idx_name] s_exact = ( @@ -389,6 +404,7 @@ class ActiveOnlyMixin: COMMON_ANALYSIS = { "char_filter": { "icu_nfkc_cf": {"type": "icu_normalizer", "name": "nfkc_cf"}, + "strip_ws_punct": {"type": "pattern_replace", "pattern": "[\\s\\p{Punct}]+", "replacement": ""}, }, "filter": { "edge_ngram_filter": {"type": "edge_ngram", "min_gram": 1, "max_gram": 20}, @@ -433,6 +449,12 @@ COMMON_ANALYSIS = { "tokenizer": "icu_tokenizer", "filter": ["lowercase", "icu_folding", "double_metaphone"], }, + "name_exact": { + "type": "custom", + "char_filter": ["icu_nfkc_cf", "strip_ws_punct"], + "tokenizer": "keyword", + "filter": ["lowercase", "icu_folding"], + }, "cjk_search": { "type": "custom", "char_filter": ["icu_nfkc_cf"], diff --git a/core/elasticsearch/documents.py b/core/elasticsearch/documents.py index a7904383..a958b76e 100644 --- a/core/elasticsearch/documents.py +++ b/core/elasticsearch/documents.py @@ -19,6 +19,7 @@ class BaseDocument(Document): # type: ignore [misc] "phonetic": fields.TextField(analyzer="name_phonetic"), "auto": fields.TextField(analyzer="autocomplete", search_analyzer="autocomplete_search"), "translit": fields.TextField(analyzer="translit_index", search_analyzer="translit_query"), + "ci": fields.TextField(analyzer="name_exact", search_analyzer="name_exact"), }, ) description = fields.TextField( @@ -32,7 +33,7 @@ class BaseDocument(Document): # type: ignore [misc] "translit": fields.TextField(analyzer="translit_index", search_analyzer="translit_query"), }, ) - slug = fields.KeywordField(attr="slug", index=False) + slug = fields.KeywordField(attr="slug") class Index: settings = { @@ -52,6 +53,7 @@ class BaseDocument(Document): # type: ignore [misc] class ProductDocument(ActiveOnlyMixin, BaseDocument): rating = fields.FloatField(attr="rating") total_orders = fields.IntegerField(attr="total_orders") + personal_orders_only = fields.BooleanField(attr="personal_orders_only") brand_priority = fields.IntegerField( attr="brand.priority", index=True, diff --git a/core/filters.py b/core/filters.py index 30cb3032..64429c99 100644 --- a/core/filters.py +++ b/core/filters.py @@ -135,6 +135,7 @@ class ProductFilter(FilterSet): # type: ignore [misc] prefix: str | None = None, ) -> None: super().__init__(data=data, queryset=queryset, request=request, prefix=prefix) + self._es_rank_applied: bool = False ordering_param = self.data.get("order_by", "") if ordering_param: order_fields = [field.strip("-") for field in ordering_param.split(",")] @@ -164,9 +165,19 @@ class ProductFilter(FilterSet): # type: ignore [misc] if not value: return queryset - uuids = [product.get("uuid") for product in process_query(query=value, indexes=("products",))["products"]] # type: ignore + es_products = process_query(query=value, indexes=("products",)) # type: ignore + uuids = [p.get("uuid") for p in (es_products or {}).get("products", [])][:33] + if not uuids: + return queryset.none() - return queryset.filter(uuid__in=uuids) + # Preserve ES order using a CASE expression + when_statements = [When(uuid=u, then=pos) for pos, u in enumerate(uuids)] + queryset = queryset.filter(uuid__in=uuids).annotate( + es_rank=Case(*when_statements, default=Value(9999), output_field=IntegerField()) + ) + # Mark that ES ranking is applied, qs() will order appropriately + self._es_rank_applied = True + return queryset def filter_include_flag(self, queryset: QuerySet[Product], name: str, value: str) -> QuerySet[Product]: if not self.data.get("category_uuid"): diff --git a/core/migrations/0052_alter_stock_system_attributes.py b/core/migrations/0052_alter_stock_system_attributes.py new file mode 100644 index 00000000..4b571fbe --- /dev/null +++ b/core/migrations/0052_alter_stock_system_attributes.py @@ -0,0 +1,18 @@ +# Generated by Django 5.2.7 on 2025-11-01 23:45 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0051_stock_system_attributes'), + ] + + operations = [ + migrations.AlterField( + model_name='stock', + name='system_attributes', + field=models.JSONField(blank=True, default=dict, verbose_name='system attributes'), + ), + ] diff --git a/core/models.py b/core/models.py index 286d0843..83d04cec 100644 --- a/core/models.py +++ b/core/models.py @@ -551,7 +551,7 @@ class Stock(ExportModelOperationsMixin("stock"), NiceModel): # type: ignore [mi verbose_name=_("digital file"), upload_to="downloadables/", ) - system_attributes = JSONField(default=dict, verbose_name=_("system attributes")) + system_attributes = JSONField(default=dict, verbose_name=_("system attributes"), blank=True) def __str__(self) -> str: return f"{self.vendor.name} - {self.product!s}"