Features: 1) Add sku and partnumber fields with search analyzers for Elasticsearch schema; 2) Implement code-like query boosting for sku and partnumber fields; 3) Separate search logic into modular build_search function;

Fixes: 1) Correct improper replacing logic for `name.ngram` and `title.ngram` in specific language cases; 2) Apply customization to handle `AUTO:5,8` fuzzy logic more cleanly;

Extra: Refactor search responses to handle brands, categories, and products separately with improved modularity.
This commit is contained in:
Egor Pavlovich Gorbunov 2025-09-05 19:42:51 +03:00
parent d811d1e5fe
commit 880f3f19b1
2 changed files with 84 additions and 34 deletions

View file

@ -1,3 +1,5 @@
import re
from django.conf import settings from django.conf import settings
from django.http import Http404 from django.http import Http404
from django.utils.text import slugify from django.utils.text import slugify
@ -26,6 +28,12 @@ SMART_FIELDS = [
"category_name^3", "category_name^3",
"category_name.ngram^2", "category_name.ngram^2",
"category_name.auto^2", "category_name.auto^2",
"sku^9",
"sku.ngram^6",
"sku.auto^8",
"partnumber^10",
"partnumber.ngram^7",
"partnumber.auto^9",
] ]
functions = [ functions = [
@ -101,6 +109,8 @@ def process_query(query: str = "", request: Request | None = None) -> dict[str,
exact_shoulds = [ exact_shoulds = [
Q("term", **{"name.raw": {"value": query, "boost": 3.0}}), Q("term", **{"name.raw": {"value": query, "boost": 3.0}}),
Q("term", **{"slug": {"value": slugify(query), "boost": 2.0}}), Q("term", **{"slug": {"value": slugify(query), "boost": 2.0}}),
Q("term", **{"sku.raw": {"value": query.lower(), "boost": 8.0}}),
Q("term", **{"partnumber.raw": {"value": query.lower(), "boost": 9.0}}),
] ]
lang = "" lang = ""
@ -112,19 +122,15 @@ def process_query(query: str = "", request: Request | None = None) -> dict[str,
is_rtl_or_indic = base in {"ar", "hi"} is_rtl_or_indic = base in {"ar", "hi"}
fields_all = SMART_FIELDS[:] fields_all = SMART_FIELDS[:]
if is_cjk or is_rtl_or_indic: if is_cjk or is_rtl_or_indic:
fields_all = [f for f in fields_all if ".phonetic" not in f] fields_all = [f for f in fields_all if ".phonetic" not in f]
if is_cjk or is_rtl_or_indic:
fields_all = [ fields_all = [
f.replace("name.ngram^6", "name.ngram^8").replace("title.ngram^4", "title.ngram^6") for f in fields_all f.replace("name.ngram^8", "name.ngram^10").replace("title.ngram^4", "title.ngram^6") for f in fields_all
] ]
if is_cjk or is_rtl_or_indic: fuzzy = None if (is_cjk or is_rtl_or_indic) else "AUTO:5,8"
fuzzy = None
else: is_code_like = bool(re.search(r"[0-9]", query)) and " " not in query
fuzzy = "AUTO:5,8"
text_shoulds = [ text_shoulds = [
Q( Q(
@ -142,44 +148,62 @@ def process_query(query: str = "", request: Request | None = None) -> dict[str,
), ),
] ]
if is_code_like:
text_shoulds.extend(
[
Q("term", **{"sku.raw": {"value": query.lower(), "boost": 12.0}}),
Q("term", **{"partnumber.raw": {"value": query.lower(), "boost": 14.0}}),
Q("prefix", **{"sku.raw": {"value": query.lower(), "boost": 6.0}}),
Q("prefix", **{"partnumber.raw": {"value": query.lower(), "boost": 7.0}}),
]
)
query_base = Q( query_base = Q(
"bool", "bool",
should=exact_shoulds + text_shoulds, should=exact_shoulds + text_shoulds,
minimum_should_match=1, minimum_should_match=1,
) )
search = ( def build_search(indexes, size):
Search(index=["products", "categories", "brands", "posts"]) return (
.query(query_base) Search(index=indexes)
.extra( .query(query_base)
rescore={ .extra(
"window_size": 200, rescore={
"query": { "window_size": 200,
"rescore_query": Q( "query": {
"function_score", "rescore_query": Q(
query=Q("match_all"), "function_score",
functions=functions, query=Q("match_all"),
boost_mode="sum", functions=functions,
score_mode="sum", boost_mode="sum",
max_boost=2.0, score_mode="sum",
).to_dict(), max_boost=2.0,
"query_weight": 1.0, ).to_dict(),
"rescore_query_weight": 1.0, "query_weight": 1.0,
}, "rescore_query_weight": 1.0,
} },
}
)
.extra(size=size, track_total_hits=True)
) )
.extra(size=100)
) search_cats = build_search(["categories"], size=22)
response = search.execute() search_brands = build_search(["brands"], size=22)
search_products = build_search(["products"], size=44)
resp_cats = search_cats.execute()
resp_brands = search_brands.execute()
resp_products = search_products.execute()
results: dict = {"products": [], "categories": [], "brands": [], "posts": []} results: dict = {"products": [], "categories": [], "brands": [], "posts": []}
uuids_by_index: dict[str, list] = {"products": [], "categories": [], "brands": []} uuids_by_index: dict[str, list] = {"products": [], "categories": [], "brands": []}
hit_cache: list = [] hit_cache: list = []
for hit in response.hits: for h in list(resp_cats.hits[:12]) + list(resp_brands.hits[:12]) + list(resp_products.hits[:26]):
hit_cache.append(hit) hit_cache.append(h)
if getattr(hit, "uuid", None): if getattr(h, "uuid", None):
uuids_by_index.setdefault(hit.meta.index, []).append(str(hit.uuid)) uuids_by_index.setdefault(h.meta.index, []).append(str(h.uuid))
products_by_uuid = {} products_by_uuid = {}
brands_by_uuid = {} brands_by_uuid = {}
@ -344,6 +368,12 @@ COMMON_ANALYSIS = {
"filter": ["lowercase", "icu_folding", "indic_norm"], "filter": ["lowercase", "icu_folding", "indic_norm"],
}, },
}, },
"normalizer": {
"lc_norm": {
"type": "custom",
"filter": ["lowercase", "icu_folding"],
}
},
} }

View file

@ -79,6 +79,26 @@ class ProductDocument(ActiveOnlyMixin, BaseDocument):
}, },
) )
sku = fields.KeywordField(
attr="sku",
normalizer="lc_norm",
fields={
"raw": fields.KeywordField(normalizer="lc_norm"),
"ngram": fields.TextField(analyzer="name_ngram", search_analyzer="icu_query"),
"auto": fields.TextField(analyzer="autocomplete", search_analyzer="autocomplete_search"),
},
)
partnumber = fields.KeywordField(
attr="partnumber",
normalizer="lc_norm",
fields={
"raw": fields.KeywordField(normalizer="lc_norm"),
"ngram": fields.TextField(analyzer="name_ngram", search_analyzer="icu_query"),
"auto": fields.TextField(analyzer="autocomplete", search_analyzer="autocomplete_search"),
},
)
def get_queryset(self): def get_queryset(self):
return ( return (
super() super()