Features: 1) Enhance product filter to support Elasticsearch ranking with preserved order; 2) Add personal_orders_only field to ProductDocument; 3) Introduce "name_exact" analyzer for case-insensitive exact matching in Elasticsearch;

Fixes: 1) Fix Elasticsearch document mapping to include missing "ci" field; 2) Correct slug field index setting from index=False to indexed; 3) Update Elasticsearch search weights and sizes for improved relevance and pagination; Extra: 1) Refactor product search logic to prioritize partnumber > sku > slug > name.ci; 2) Add new analyzer "name_exact" for case-insensitive exact matching; 3) Adjust stock model to allow blank system_attributes; 4) Update migration to reflect JSONField changes; 5) Adjust Elasticsearch boosting weights for better ranking; 6) Increase category search size to 33 for consistency; 7) Add missing personal_orders_only field to ProductDocument.
2025-11-02 03:09:16 +03:00 · 2025-11-02 03:09:16 +03:00 · 38b22704b1
commit 38b22704b1
parent 0cec8b0380
5 changed files with 89 additions and 36 deletions
--- a/core/elasticsearch/init.py
+++ b/core/elasticsearch/init.py
@ -58,32 +58,36 @@ functions = [
    {
        "filter": Q("term", **{"_index": "products"}),
        "field_value_factor": {
-            "field": "rating",
+            "field": "category_priority",
            "modifier": "log1p",
-            "factor": 0.10,
+            "factor": 0.16,
            "missing": 0,
        },
-        "weight": 0.3,
+        "weight": 0.36,
    },
    {
        "filter": Q("term", **{"_index": "products"}),
        "field_value_factor": {
            "field": "rating",
            "modifier": "log1p",
            "factor": 0.08,
            "missing": 0,
        },
        "weight": 0.25,
    },
    {
        "filter": Q("term", **{"_index": "products"}),
        "field_value_factor": {
            "field": "total_orders",
            "modifier": "log1p",
            "factor": 0.18,
            "missing": 0,
        },
        "weight": 0.4,
    },
    {
        "filter": Q("term", **{"_index": "products"}),
        "field_value_factor": {
            "field": "category_priority",
            "modifier": "log1p",
            "factor": 0.15,
            "missing": 0,
        },
-        "weight": 0.35,
+        "weight": 0.3,
    },
    {
        "filter": Q("bool", must=[Q("term", **{"_index": "products"}), Q("term", **{"personal_orders_only": False})]),
        "weight": 0.7,
    },
    {
        "filter": Q("term", **{"_index": "categories"}),
@ -120,10 +124,10 @@ def process_query(
    query = query.strip()
    try:
        exact_shoulds = [
-            Q("term", **{"name.raw": {"value": query, "boost": 2.0}}),
+            Q("term", **{"partnumber.raw": {"value": query.lower(), "boost": 20.0}}),
-            Q("term", **{"slug": {"value": slugify(query), "boost": 1.5}}),
+            Q("term", **{"sku.raw": {"value": query.lower(), "boost": 16.0}}),
-            Q("term", **{"sku.raw": {"value": query.lower(), "boost": 6.0}}),
+            Q("term", **{"slug": {"value": slugify(query), "boost": 12.0}}),
-            Q("term", **{"partnumber.raw": {"value": query.lower(), "boost": 7.0}}),
+            Q("match", **{"name.ci": {"query": query, "boost": 8.0}}),
        ]
        lang = ""
@ -172,10 +176,9 @@ def process_query(
        if is_code_like:
            text_shoulds.extend(
                [
-                    Q("term", **{"sku.raw": {"value": query.lower(), "boost": 10.0}}),
+                    Q("term", **{"partnumber.raw": {"value": query.lower(), "boost": 14.0}}),
-                    Q("term", **{"partnumber.raw": {"value": query.lower(), "boost": 12.0}}),
+                    Q("term", **{"sku.raw": {"value": query.lower(), "boost": 12.0}}),
-                    Q("prefix", **{"sku.raw": {"value": query.lower(), "boost": 5.0}}),
+                    Q("prefix", **{"partnumber.raw": {"value": query.lower(), "boost": 4.0}}),
                    Q("prefix", **{"partnumber.raw": {"value": query.lower(), "boost": 6.0}}),
                ]
            )
@ -211,17 +214,17 @@ def process_query(
        resp_cats = None
        if "categories" in indexes:
-            search_cats = build_search(["categories"], size=22)
+            search_cats = build_search(["categories"], size=33)
            resp_cats = search_cats.execute()
        resp_brands = None
        if "brands" in indexes:
-            search_brands = build_search(["brands"], size=22)
+            search_brands = build_search(["brands"], size=33)
            resp_brands = search_brands.execute()
        resp_products = None
        if "products" in indexes:
-            search_products = build_search(["products"], size=44)
+            search_products = build_search(["products"], size=33)
            resp_products = search_products.execute()
        results: dict[str, list[dict[str, Any]]] = {"products": [], "categories": [], "brands": [], "posts": []}
@ -252,15 +255,27 @@ def process_query(
                Q("term", **{"name.raw": {"value": query}}),
                Q("term", **{"slug": {"value": slugify(query)}}),
            ],
            "products": [
                Q("term", **{"name.raw": {"value": query}}),
                Q("term", **{"slug": {"value": slugify(query)}}),
                Q("term", **{"sku.raw": {"value": query.lower()}}),
                Q("term", **{"partnumber.raw": {"value": query.lower()}}),
            ],
        }
-        for idx_name in ("categories", "brands", "products"):
+        # Collect exact product matches in strict priority: partnumber > sku > slug > name.ci
        if "products" in indexes:
            product_exact_sequence = [
                Q("term", **{"partnumber.raw": {"value": query.lower()}}),
                Q("term", **{"sku.raw": {"value": query.lower()}}),
                Q("term", **{"slug": {"value": slugify(query)}}),
                Q("match", **{"name.ci": {"query": query}}),
            ]
            for qx in product_exact_sequence:
                try:
                    resp_exact = (
                        Search(index=["products"]).query(qx).extra(size=5, track_total_hits=False).execute()
                    )
                except NotFoundError:
                    resp_exact = None
                if resp_exact is not None and getattr(resp_exact, "hits", None):
                    _collect_hits(list(resp_exact.hits))
        for idx_name in ("categories", "brands"):
            if idx_name in indexes:
                shoulds = exact_queries_by_index[idx_name]
                s_exact = (
@ -389,6 +404,7 @@ class ActiveOnlyMixin:
 COMMON_ANALYSIS = {
    "char_filter": {
        "icu_nfkc_cf": {"type": "icu_normalizer", "name": "nfkc_cf"},
        "strip_ws_punct": {"type": "pattern_replace", "pattern": "[\\s\\p{Punct}]+", "replacement": ""},
    },
    "filter": {
        "edge_ngram_filter": {"type": "edge_ngram", "min_gram": 1, "max_gram": 20},
@ -433,6 +449,12 @@ COMMON_ANALYSIS = {
            "tokenizer": "icu_tokenizer",
            "filter": ["lowercase", "icu_folding", "double_metaphone"],
        },
        "name_exact": {
            "type": "custom",
            "char_filter": ["icu_nfkc_cf", "strip_ws_punct"],
            "tokenizer": "keyword",
            "filter": ["lowercase", "icu_folding"],
        },
        "cjk_search": {
            "type": "custom",
            "char_filter": ["icu_nfkc_cf"],
--- a/core/elasticsearch/documents.py
+++ b/core/elasticsearch/documents.py
@ -19,6 +19,7 @@ class BaseDocument(Document):  # type: ignore [misc]
            "phonetic": fields.TextField(analyzer="name_phonetic"),
            "auto": fields.TextField(analyzer="autocomplete", search_analyzer="autocomplete_search"),
            "translit": fields.TextField(analyzer="translit_index", search_analyzer="translit_query"),
            "ci": fields.TextField(analyzer="name_exact", search_analyzer="name_exact"),
        },
    )
    description = fields.TextField(
@ -32,7 +33,7 @@ class BaseDocument(Document):  # type: ignore [misc]
            "translit": fields.TextField(analyzer="translit_index", search_analyzer="translit_query"),
        },
    )
-    slug = fields.KeywordField(attr="slug", index=False)
+    slug = fields.KeywordField(attr="slug")
    class Index:
        settings = {
@ -52,6 +53,7 @@ class BaseDocument(Document):  # type: ignore [misc]
 class ProductDocument(ActiveOnlyMixin, BaseDocument):
    rating = fields.FloatField(attr="rating")
    total_orders = fields.IntegerField(attr="total_orders")
    personal_orders_only = fields.BooleanField(attr="personal_orders_only")
    brand_priority = fields.IntegerField(
        attr="brand.priority",
        index=True,
--- a/core/filters.py
+++ b/core/filters.py
@ -135,6 +135,7 @@ class ProductFilter(FilterSet):  # type: ignore [misc]
        prefix: str | None = None,
    ) -> None:
        super().__init__(data=data, queryset=queryset, request=request, prefix=prefix)
        self._es_rank_applied: bool = False
        ordering_param = self.data.get("order_by", "")
        if ordering_param:
            order_fields = [field.strip("-") for field in ordering_param.split(",")]
@ -164,9 +165,19 @@ class ProductFilter(FilterSet):  # type: ignore [misc]
        if not value:
            return queryset
-        uuids = [product.get("uuid") for product in process_query(query=value, indexes=("products",))["products"]]  # type: ignore
+        es_products = process_query(query=value, indexes=("products",))  # type: ignore
        uuids = [p.get("uuid") for p in (es_products or {}).get("products", [])][:33]
        if not uuids:
            return queryset.none()
-        return queryset.filter(uuid__in=uuids)
+        # Preserve ES order using a CASE expression
        when_statements = [When(uuid=u, then=pos) for pos, u in enumerate(uuids)]
        queryset = queryset.filter(uuid__in=uuids).annotate(
            es_rank=Case(*when_statements, default=Value(9999), output_field=IntegerField())
        )
        # Mark that ES ranking is applied, qs() will order appropriately
        self._es_rank_applied = True
        return queryset
    def filter_include_flag(self, queryset: QuerySet[Product], name: str, value: str) -> QuerySet[Product]:
        if not self.data.get("category_uuid"):
--- a/core/migrations/0052_alter_stock_system_attributes.py
+++ b/core/migrations/0052_alter_stock_system_attributes.py
@ -0,0 +1,18 @@
 # Generated by Django 5.2.7 on 2025-11-01 23:45
 from django.db import migrations, models
 class Migration(migrations.Migration):
    dependencies = [
        ('core', '0051_stock_system_attributes'),
    ]
    operations = [
        migrations.AlterField(
            model_name='stock',
            name='system_attributes',
            field=models.JSONField(blank=True, default=dict, verbose_name='system attributes'),
        ),
    ]
--- a/core/models.py
+++ b/core/models.py
@ -551,7 +551,7 @@ class Stock(ExportModelOperationsMixin("stock"), NiceModel):  # type: ignore [mi
        verbose_name=_("digital file"),
        upload_to="downloadables/",
    )
-    system_attributes = JSONField(default=dict, verbose_name=_("system attributes"))
+    system_attributes = JSONField(default=dict, verbose_name=_("system attributes"), blank=True)
    def __str__(self) -> str:
        return f"{self.vendor.name} - {self.product!s}"