Features: 1) Add sku and partnumber fields with search analyzers for Elasticsearch schema; 2) Implement code-like query boosting for sku and partnumber fields; 3) Separate search logic into modular build_search function;

Fixes: 1) Correct improper replacing logic for `name.ngram` and `title.ngram` in specific language cases; 2) Apply customization to handle `AUTO:5,8` fuzzy logic more cleanly; Extra: Refactor search responses to handle brands, categories, and products separately with improved modularity.
2025-09-05 19:42:51 +03:00 · 2025-09-05 19:42:51 +03:00 · 880f3f19b1
commit 880f3f19b1
parent d811d1e5fe
2 changed files with 84 additions and 34 deletions
--- a/core/elasticsearch/init.py
+++ b/core/elasticsearch/init.py
@ -1,3 +1,5 @@
+import re
+
 from django.conf import settings
 from django.http import Http404
 from django.utils.text import slugify
@ -26,6 +28,12 @@ SMART_FIELDS = [
    "category_name^3",
    "category_name.ngram^2",
    "category_name.auto^2",
+    "sku^9",
+    "sku.ngram^6",
+    "sku.auto^8",
+    "partnumber^10",
+    "partnumber.ngram^7",
+    "partnumber.auto^9",
 ]

 functions = [
@ -101,6 +109,8 @@ def process_query(query: str = "", request: Request | None = None) -> dict[str,
        exact_shoulds = [
            Q("term", **{"name.raw": {"value": query, "boost": 3.0}}),
            Q("term", **{"slug": {"value": slugify(query), "boost": 2.0}}),
+            Q("term", **{"sku.raw": {"value": query.lower(), "boost": 8.0}}),
+            Q("term", **{"partnumber.raw": {"value": query.lower(), "boost": 9.0}}),
        ]

        lang = ""
@ -112,19 +122,15 @@ def process_query(query: str = "", request: Request | None = None) -> dict[str,
        is_rtl_or_indic = base in {"ar", "hi"}

        fields_all = SMART_FIELDS[:]
-
        if is_cjk or is_rtl_or_indic:
            fields_all = [f for f in fields_all if ".phonetic" not in f]
-
-        if is_cjk or is_rtl_or_indic:
            fields_all = [
-                f.replace("name.ngram^6", "name.ngram^8").replace("title.ngram^4", "title.ngram^6") for f in fields_all
+                f.replace("name.ngram^8", "name.ngram^10").replace("title.ngram^4", "title.ngram^6") for f in fields_all
            ]

-        if is_cjk or is_rtl_or_indic:
-            fuzzy = None
-        else:
-            fuzzy = "AUTO:5,8"
+        fuzzy = None if (is_cjk or is_rtl_or_indic) else "AUTO:5,8"
+
+        is_code_like = bool(re.search(r"[0-9]", query)) and " " not in query

        text_shoulds = [
            Q(
@ -142,44 +148,62 @@ def process_query(query: str = "", request: Request | None = None) -> dict[str,
            ),
        ]

+        if is_code_like:
+            text_shoulds.extend(
+                [
+                    Q("term", **{"sku.raw": {"value": query.lower(), "boost": 12.0}}),
+                    Q("term", **{"partnumber.raw": {"value": query.lower(), "boost": 14.0}}),
+                    Q("prefix", **{"sku.raw": {"value": query.lower(), "boost": 6.0}}),
+                    Q("prefix", **{"partnumber.raw": {"value": query.lower(), "boost": 7.0}}),
+                ]
+            )
+
        query_base = Q(
            "bool",
            should=exact_shoulds + text_shoulds,
            minimum_should_match=1,
        )

-        search = (
-            Search(index=["products", "categories", "brands", "posts"])
-            .query(query_base)
-            .extra(
-                rescore={
-                    "window_size": 200,
-                    "query": {
-                        "rescore_query": Q(
-                            "function_score",
-                            query=Q("match_all"),
-                            functions=functions,
-                            boost_mode="sum",
-                            score_mode="sum",
-                            max_boost=2.0,
-                        ).to_dict(),
-                        "query_weight": 1.0,
-                        "rescore_query_weight": 1.0,
-                    },
-                }
+        def build_search(indexes, size):
+            return (
+                Search(index=indexes)
+                .query(query_base)
+                .extra(
+                    rescore={
+                        "window_size": 200,
+                        "query": {
+                            "rescore_query": Q(
+                                "function_score",
+                                query=Q("match_all"),
+                                functions=functions,
+                                boost_mode="sum",
+                                score_mode="sum",
+                                max_boost=2.0,
+                            ).to_dict(),
+                            "query_weight": 1.0,
+                            "rescore_query_weight": 1.0,
+                        },
+                    }
+                )
+                .extra(size=size, track_total_hits=True)
            )
-            .extra(size=100)
-        )
-        response = search.execute()
+
+        search_cats = build_search(["categories"], size=22)
+        search_brands = build_search(["brands"], size=22)
+        search_products = build_search(["products"], size=44)
+
+        resp_cats = search_cats.execute()
+        resp_brands = search_brands.execute()
+        resp_products = search_products.execute()

        results: dict = {"products": [], "categories": [], "brands": [], "posts": []}
        uuids_by_index: dict[str, list] = {"products": [], "categories": [], "brands": []}
        hit_cache: list = []

-        for hit in response.hits:
-            hit_cache.append(hit)
-            if getattr(hit, "uuid", None):
-                uuids_by_index.setdefault(hit.meta.index, []).append(str(hit.uuid))
+        for h in list(resp_cats.hits[:12]) + list(resp_brands.hits[:12]) + list(resp_products.hits[:26]):
+            hit_cache.append(h)
+            if getattr(h, "uuid", None):
+                uuids_by_index.setdefault(h.meta.index, []).append(str(h.uuid))

        products_by_uuid = {}
        brands_by_uuid = {}
@ -344,6 +368,12 @@ COMMON_ANALYSIS = {
            "filter": ["lowercase", "icu_folding", "indic_norm"],
        },
    },
+    "normalizer": {
+        "lc_norm": {
+            "type": "custom",
+            "filter": ["lowercase", "icu_folding"],
+        }
+    },
 }


--- a/core/elasticsearch/documents.py
+++ b/core/elasticsearch/documents.py
@ -79,6 +79,26 @@ class ProductDocument(ActiveOnlyMixin, BaseDocument):
        },
    )

+    sku = fields.KeywordField(
+        attr="sku",
+        normalizer="lc_norm",
+        fields={
+            "raw": fields.KeywordField(normalizer="lc_norm"),
+            "ngram": fields.TextField(analyzer="name_ngram", search_analyzer="icu_query"),
+            "auto": fields.TextField(analyzer="autocomplete", search_analyzer="autocomplete_search"),
+        },
+    )
+
+    partnumber = fields.KeywordField(
+        attr="partnumber",
+        normalizer="lc_norm",
+        fields={
+            "raw": fields.KeywordField(normalizer="lc_norm"),
+            "ngram": fields.TextField(analyzer="name_ngram", search_analyzer="icu_query"),
+            "auto": fields.TextField(analyzer="autocomplete", search_analyzer="autocomplete_search"),
+        },
+    )
+
    def get_queryset(self):
        return (
            super()