diff --git a/core/elasticsearch/__init__.py b/core/elasticsearch/__init__.py index ef5e85e8..daea16d6 100644 --- a/core/elasticsearch/__init__.py +++ b/core/elasticsearch/__init__.py @@ -1,3 +1,5 @@ +import re + from django.conf import settings from django.http import Http404 from django.utils.text import slugify @@ -26,6 +28,12 @@ SMART_FIELDS = [ "category_name^3", "category_name.ngram^2", "category_name.auto^2", + "sku^9", + "sku.ngram^6", + "sku.auto^8", + "partnumber^10", + "partnumber.ngram^7", + "partnumber.auto^9", ] functions = [ @@ -101,6 +109,8 @@ def process_query(query: str = "", request: Request | None = None) -> dict[str, exact_shoulds = [ Q("term", **{"name.raw": {"value": query, "boost": 3.0}}), Q("term", **{"slug": {"value": slugify(query), "boost": 2.0}}), + Q("term", **{"sku.raw": {"value": query.lower(), "boost": 8.0}}), + Q("term", **{"partnumber.raw": {"value": query.lower(), "boost": 9.0}}), ] lang = "" @@ -112,19 +122,15 @@ def process_query(query: str = "", request: Request | None = None) -> dict[str, is_rtl_or_indic = base in {"ar", "hi"} fields_all = SMART_FIELDS[:] - if is_cjk or is_rtl_or_indic: fields_all = [f for f in fields_all if ".phonetic" not in f] - - if is_cjk or is_rtl_or_indic: fields_all = [ - f.replace("name.ngram^6", "name.ngram^8").replace("title.ngram^4", "title.ngram^6") for f in fields_all + f.replace("name.ngram^8", "name.ngram^10").replace("title.ngram^4", "title.ngram^6") for f in fields_all ] - if is_cjk or is_rtl_or_indic: - fuzzy = None - else: - fuzzy = "AUTO:5,8" + fuzzy = None if (is_cjk or is_rtl_or_indic) else "AUTO:5,8" + + is_code_like = bool(re.search(r"[0-9]", query)) and " " not in query text_shoulds = [ Q( @@ -142,44 +148,62 @@ def process_query(query: str = "", request: Request | None = None) -> dict[str, ), ] + if is_code_like: + text_shoulds.extend( + [ + Q("term", **{"sku.raw": {"value": query.lower(), "boost": 12.0}}), + Q("term", **{"partnumber.raw": {"value": query.lower(), "boost": 14.0}}), + Q("prefix", **{"sku.raw": {"value": query.lower(), "boost": 6.0}}), + Q("prefix", **{"partnumber.raw": {"value": query.lower(), "boost": 7.0}}), + ] + ) + query_base = Q( "bool", should=exact_shoulds + text_shoulds, minimum_should_match=1, ) - search = ( - Search(index=["products", "categories", "brands", "posts"]) - .query(query_base) - .extra( - rescore={ - "window_size": 200, - "query": { - "rescore_query": Q( - "function_score", - query=Q("match_all"), - functions=functions, - boost_mode="sum", - score_mode="sum", - max_boost=2.0, - ).to_dict(), - "query_weight": 1.0, - "rescore_query_weight": 1.0, - }, - } + def build_search(indexes, size): + return ( + Search(index=indexes) + .query(query_base) + .extra( + rescore={ + "window_size": 200, + "query": { + "rescore_query": Q( + "function_score", + query=Q("match_all"), + functions=functions, + boost_mode="sum", + score_mode="sum", + max_boost=2.0, + ).to_dict(), + "query_weight": 1.0, + "rescore_query_weight": 1.0, + }, + } + ) + .extra(size=size, track_total_hits=True) ) - .extra(size=100) - ) - response = search.execute() + + search_cats = build_search(["categories"], size=22) + search_brands = build_search(["brands"], size=22) + search_products = build_search(["products"], size=44) + + resp_cats = search_cats.execute() + resp_brands = search_brands.execute() + resp_products = search_products.execute() results: dict = {"products": [], "categories": [], "brands": [], "posts": []} uuids_by_index: dict[str, list] = {"products": [], "categories": [], "brands": []} hit_cache: list = [] - for hit in response.hits: - hit_cache.append(hit) - if getattr(hit, "uuid", None): - uuids_by_index.setdefault(hit.meta.index, []).append(str(hit.uuid)) + for h in list(resp_cats.hits[:12]) + list(resp_brands.hits[:12]) + list(resp_products.hits[:26]): + hit_cache.append(h) + if getattr(h, "uuid", None): + uuids_by_index.setdefault(h.meta.index, []).append(str(h.uuid)) products_by_uuid = {} brands_by_uuid = {} @@ -344,6 +368,12 @@ COMMON_ANALYSIS = { "filter": ["lowercase", "icu_folding", "indic_norm"], }, }, + "normalizer": { + "lc_norm": { + "type": "custom", + "filter": ["lowercase", "icu_folding"], + } + }, } diff --git a/core/elasticsearch/documents.py b/core/elasticsearch/documents.py index b2bdca46..008608ba 100644 --- a/core/elasticsearch/documents.py +++ b/core/elasticsearch/documents.py @@ -79,6 +79,26 @@ class ProductDocument(ActiveOnlyMixin, BaseDocument): }, ) + sku = fields.KeywordField( + attr="sku", + normalizer="lc_norm", + fields={ + "raw": fields.KeywordField(normalizer="lc_norm"), + "ngram": fields.TextField(analyzer="name_ngram", search_analyzer="icu_query"), + "auto": fields.TextField(analyzer="autocomplete", search_analyzer="autocomplete_search"), + }, + ) + + partnumber = fields.KeywordField( + attr="partnumber", + normalizer="lc_norm", + fields={ + "raw": fields.KeywordField(normalizer="lc_norm"), + "ngram": fields.TextField(analyzer="name_ngram", search_analyzer="icu_query"), + "auto": fields.TextField(analyzer="autocomplete", search_analyzer="autocomplete_search"), + }, + ) + def get_queryset(self): return ( super()