Features: 1) Add sku and partnumber fields with search analyzers for Elasticsearch schema; 2) Implement code-like query boosting for sku and partnumber fields; 3) Separate search logic into modular build_search function;
Fixes: 1) Correct improper replacing logic for `name.ngram` and `title.ngram` in specific language cases; 2) Apply customization to handle `AUTO:5,8` fuzzy logic more cleanly; Extra: Refactor search responses to handle brands, categories, and products separately with improved modularity.
This commit is contained in:
parent
d811d1e5fe
commit
880f3f19b1
2 changed files with 84 additions and 34 deletions
|
|
@ -1,3 +1,5 @@
|
|||
import re
|
||||
|
||||
from django.conf import settings
|
||||
from django.http import Http404
|
||||
from django.utils.text import slugify
|
||||
|
|
@ -26,6 +28,12 @@ SMART_FIELDS = [
|
|||
"category_name^3",
|
||||
"category_name.ngram^2",
|
||||
"category_name.auto^2",
|
||||
"sku^9",
|
||||
"sku.ngram^6",
|
||||
"sku.auto^8",
|
||||
"partnumber^10",
|
||||
"partnumber.ngram^7",
|
||||
"partnumber.auto^9",
|
||||
]
|
||||
|
||||
functions = [
|
||||
|
|
@ -101,6 +109,8 @@ def process_query(query: str = "", request: Request | None = None) -> dict[str,
|
|||
exact_shoulds = [
|
||||
Q("term", **{"name.raw": {"value": query, "boost": 3.0}}),
|
||||
Q("term", **{"slug": {"value": slugify(query), "boost": 2.0}}),
|
||||
Q("term", **{"sku.raw": {"value": query.lower(), "boost": 8.0}}),
|
||||
Q("term", **{"partnumber.raw": {"value": query.lower(), "boost": 9.0}}),
|
||||
]
|
||||
|
||||
lang = ""
|
||||
|
|
@ -112,19 +122,15 @@ def process_query(query: str = "", request: Request | None = None) -> dict[str,
|
|||
is_rtl_or_indic = base in {"ar", "hi"}
|
||||
|
||||
fields_all = SMART_FIELDS[:]
|
||||
|
||||
if is_cjk or is_rtl_or_indic:
|
||||
fields_all = [f for f in fields_all if ".phonetic" not in f]
|
||||
|
||||
if is_cjk or is_rtl_or_indic:
|
||||
fields_all = [
|
||||
f.replace("name.ngram^6", "name.ngram^8").replace("title.ngram^4", "title.ngram^6") for f in fields_all
|
||||
f.replace("name.ngram^8", "name.ngram^10").replace("title.ngram^4", "title.ngram^6") for f in fields_all
|
||||
]
|
||||
|
||||
if is_cjk or is_rtl_or_indic:
|
||||
fuzzy = None
|
||||
else:
|
||||
fuzzy = "AUTO:5,8"
|
||||
fuzzy = None if (is_cjk or is_rtl_or_indic) else "AUTO:5,8"
|
||||
|
||||
is_code_like = bool(re.search(r"[0-9]", query)) and " " not in query
|
||||
|
||||
text_shoulds = [
|
||||
Q(
|
||||
|
|
@ -142,44 +148,62 @@ def process_query(query: str = "", request: Request | None = None) -> dict[str,
|
|||
),
|
||||
]
|
||||
|
||||
if is_code_like:
|
||||
text_shoulds.extend(
|
||||
[
|
||||
Q("term", **{"sku.raw": {"value": query.lower(), "boost": 12.0}}),
|
||||
Q("term", **{"partnumber.raw": {"value": query.lower(), "boost": 14.0}}),
|
||||
Q("prefix", **{"sku.raw": {"value": query.lower(), "boost": 6.0}}),
|
||||
Q("prefix", **{"partnumber.raw": {"value": query.lower(), "boost": 7.0}}),
|
||||
]
|
||||
)
|
||||
|
||||
query_base = Q(
|
||||
"bool",
|
||||
should=exact_shoulds + text_shoulds,
|
||||
minimum_should_match=1,
|
||||
)
|
||||
|
||||
search = (
|
||||
Search(index=["products", "categories", "brands", "posts"])
|
||||
.query(query_base)
|
||||
.extra(
|
||||
rescore={
|
||||
"window_size": 200,
|
||||
"query": {
|
||||
"rescore_query": Q(
|
||||
"function_score",
|
||||
query=Q("match_all"),
|
||||
functions=functions,
|
||||
boost_mode="sum",
|
||||
score_mode="sum",
|
||||
max_boost=2.0,
|
||||
).to_dict(),
|
||||
"query_weight": 1.0,
|
||||
"rescore_query_weight": 1.0,
|
||||
},
|
||||
}
|
||||
def build_search(indexes, size):
|
||||
return (
|
||||
Search(index=indexes)
|
||||
.query(query_base)
|
||||
.extra(
|
||||
rescore={
|
||||
"window_size": 200,
|
||||
"query": {
|
||||
"rescore_query": Q(
|
||||
"function_score",
|
||||
query=Q("match_all"),
|
||||
functions=functions,
|
||||
boost_mode="sum",
|
||||
score_mode="sum",
|
||||
max_boost=2.0,
|
||||
).to_dict(),
|
||||
"query_weight": 1.0,
|
||||
"rescore_query_weight": 1.0,
|
||||
},
|
||||
}
|
||||
)
|
||||
.extra(size=size, track_total_hits=True)
|
||||
)
|
||||
.extra(size=100)
|
||||
)
|
||||
response = search.execute()
|
||||
|
||||
search_cats = build_search(["categories"], size=22)
|
||||
search_brands = build_search(["brands"], size=22)
|
||||
search_products = build_search(["products"], size=44)
|
||||
|
||||
resp_cats = search_cats.execute()
|
||||
resp_brands = search_brands.execute()
|
||||
resp_products = search_products.execute()
|
||||
|
||||
results: dict = {"products": [], "categories": [], "brands": [], "posts": []}
|
||||
uuids_by_index: dict[str, list] = {"products": [], "categories": [], "brands": []}
|
||||
hit_cache: list = []
|
||||
|
||||
for hit in response.hits:
|
||||
hit_cache.append(hit)
|
||||
if getattr(hit, "uuid", None):
|
||||
uuids_by_index.setdefault(hit.meta.index, []).append(str(hit.uuid))
|
||||
for h in list(resp_cats.hits[:12]) + list(resp_brands.hits[:12]) + list(resp_products.hits[:26]):
|
||||
hit_cache.append(h)
|
||||
if getattr(h, "uuid", None):
|
||||
uuids_by_index.setdefault(h.meta.index, []).append(str(h.uuid))
|
||||
|
||||
products_by_uuid = {}
|
||||
brands_by_uuid = {}
|
||||
|
|
@ -344,6 +368,12 @@ COMMON_ANALYSIS = {
|
|||
"filter": ["lowercase", "icu_folding", "indic_norm"],
|
||||
},
|
||||
},
|
||||
"normalizer": {
|
||||
"lc_norm": {
|
||||
"type": "custom",
|
||||
"filter": ["lowercase", "icu_folding"],
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -79,6 +79,26 @@ class ProductDocument(ActiveOnlyMixin, BaseDocument):
|
|||
},
|
||||
)
|
||||
|
||||
sku = fields.KeywordField(
|
||||
attr="sku",
|
||||
normalizer="lc_norm",
|
||||
fields={
|
||||
"raw": fields.KeywordField(normalizer="lc_norm"),
|
||||
"ngram": fields.TextField(analyzer="name_ngram", search_analyzer="icu_query"),
|
||||
"auto": fields.TextField(analyzer="autocomplete", search_analyzer="autocomplete_search"),
|
||||
},
|
||||
)
|
||||
|
||||
partnumber = fields.KeywordField(
|
||||
attr="partnumber",
|
||||
normalizer="lc_norm",
|
||||
fields={
|
||||
"raw": fields.KeywordField(normalizer="lc_norm"),
|
||||
"ngram": fields.TextField(analyzer="name_ngram", search_analyzer="icu_query"),
|
||||
"auto": fields.TextField(analyzer="autocomplete", search_analyzer="autocomplete_search"),
|
||||
},
|
||||
)
|
||||
|
||||
def get_queryset(self):
|
||||
return (
|
||||
super()
|
||||
|
|
|
|||
Loading…
Reference in a new issue