Features: 1) Add sku and partnumber fields with search analyzers for Elasticsearch schema; 2) Implement code-like query boosting for sku and partnumber fields; 3) Separate search logic into modular build_search function;
Fixes: 1) Correct improper replacing logic for `name.ngram` and `title.ngram` in specific language cases; 2) Apply customization to handle `AUTO:5,8` fuzzy logic more cleanly; Extra: Refactor search responses to handle brands, categories, and products separately with improved modularity.
This commit is contained in:
parent
d811d1e5fe
commit
880f3f19b1
2 changed files with 84 additions and 34 deletions
|
|
@ -1,3 +1,5 @@
|
||||||
|
import re
|
||||||
|
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
from django.http import Http404
|
from django.http import Http404
|
||||||
from django.utils.text import slugify
|
from django.utils.text import slugify
|
||||||
|
|
@ -26,6 +28,12 @@ SMART_FIELDS = [
|
||||||
"category_name^3",
|
"category_name^3",
|
||||||
"category_name.ngram^2",
|
"category_name.ngram^2",
|
||||||
"category_name.auto^2",
|
"category_name.auto^2",
|
||||||
|
"sku^9",
|
||||||
|
"sku.ngram^6",
|
||||||
|
"sku.auto^8",
|
||||||
|
"partnumber^10",
|
||||||
|
"partnumber.ngram^7",
|
||||||
|
"partnumber.auto^9",
|
||||||
]
|
]
|
||||||
|
|
||||||
functions = [
|
functions = [
|
||||||
|
|
@ -101,6 +109,8 @@ def process_query(query: str = "", request: Request | None = None) -> dict[str,
|
||||||
exact_shoulds = [
|
exact_shoulds = [
|
||||||
Q("term", **{"name.raw": {"value": query, "boost": 3.0}}),
|
Q("term", **{"name.raw": {"value": query, "boost": 3.0}}),
|
||||||
Q("term", **{"slug": {"value": slugify(query), "boost": 2.0}}),
|
Q("term", **{"slug": {"value": slugify(query), "boost": 2.0}}),
|
||||||
|
Q("term", **{"sku.raw": {"value": query.lower(), "boost": 8.0}}),
|
||||||
|
Q("term", **{"partnumber.raw": {"value": query.lower(), "boost": 9.0}}),
|
||||||
]
|
]
|
||||||
|
|
||||||
lang = ""
|
lang = ""
|
||||||
|
|
@ -112,19 +122,15 @@ def process_query(query: str = "", request: Request | None = None) -> dict[str,
|
||||||
is_rtl_or_indic = base in {"ar", "hi"}
|
is_rtl_or_indic = base in {"ar", "hi"}
|
||||||
|
|
||||||
fields_all = SMART_FIELDS[:]
|
fields_all = SMART_FIELDS[:]
|
||||||
|
|
||||||
if is_cjk or is_rtl_or_indic:
|
if is_cjk or is_rtl_or_indic:
|
||||||
fields_all = [f for f in fields_all if ".phonetic" not in f]
|
fields_all = [f for f in fields_all if ".phonetic" not in f]
|
||||||
|
|
||||||
if is_cjk or is_rtl_or_indic:
|
|
||||||
fields_all = [
|
fields_all = [
|
||||||
f.replace("name.ngram^6", "name.ngram^8").replace("title.ngram^4", "title.ngram^6") for f in fields_all
|
f.replace("name.ngram^8", "name.ngram^10").replace("title.ngram^4", "title.ngram^6") for f in fields_all
|
||||||
]
|
]
|
||||||
|
|
||||||
if is_cjk or is_rtl_or_indic:
|
fuzzy = None if (is_cjk or is_rtl_or_indic) else "AUTO:5,8"
|
||||||
fuzzy = None
|
|
||||||
else:
|
is_code_like = bool(re.search(r"[0-9]", query)) and " " not in query
|
||||||
fuzzy = "AUTO:5,8"
|
|
||||||
|
|
||||||
text_shoulds = [
|
text_shoulds = [
|
||||||
Q(
|
Q(
|
||||||
|
|
@ -142,44 +148,62 @@ def process_query(query: str = "", request: Request | None = None) -> dict[str,
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
if is_code_like:
|
||||||
|
text_shoulds.extend(
|
||||||
|
[
|
||||||
|
Q("term", **{"sku.raw": {"value": query.lower(), "boost": 12.0}}),
|
||||||
|
Q("term", **{"partnumber.raw": {"value": query.lower(), "boost": 14.0}}),
|
||||||
|
Q("prefix", **{"sku.raw": {"value": query.lower(), "boost": 6.0}}),
|
||||||
|
Q("prefix", **{"partnumber.raw": {"value": query.lower(), "boost": 7.0}}),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
query_base = Q(
|
query_base = Q(
|
||||||
"bool",
|
"bool",
|
||||||
should=exact_shoulds + text_shoulds,
|
should=exact_shoulds + text_shoulds,
|
||||||
minimum_should_match=1,
|
minimum_should_match=1,
|
||||||
)
|
)
|
||||||
|
|
||||||
search = (
|
def build_search(indexes, size):
|
||||||
Search(index=["products", "categories", "brands", "posts"])
|
return (
|
||||||
.query(query_base)
|
Search(index=indexes)
|
||||||
.extra(
|
.query(query_base)
|
||||||
rescore={
|
.extra(
|
||||||
"window_size": 200,
|
rescore={
|
||||||
"query": {
|
"window_size": 200,
|
||||||
"rescore_query": Q(
|
"query": {
|
||||||
"function_score",
|
"rescore_query": Q(
|
||||||
query=Q("match_all"),
|
"function_score",
|
||||||
functions=functions,
|
query=Q("match_all"),
|
||||||
boost_mode="sum",
|
functions=functions,
|
||||||
score_mode="sum",
|
boost_mode="sum",
|
||||||
max_boost=2.0,
|
score_mode="sum",
|
||||||
).to_dict(),
|
max_boost=2.0,
|
||||||
"query_weight": 1.0,
|
).to_dict(),
|
||||||
"rescore_query_weight": 1.0,
|
"query_weight": 1.0,
|
||||||
},
|
"rescore_query_weight": 1.0,
|
||||||
}
|
},
|
||||||
|
}
|
||||||
|
)
|
||||||
|
.extra(size=size, track_total_hits=True)
|
||||||
)
|
)
|
||||||
.extra(size=100)
|
|
||||||
)
|
search_cats = build_search(["categories"], size=22)
|
||||||
response = search.execute()
|
search_brands = build_search(["brands"], size=22)
|
||||||
|
search_products = build_search(["products"], size=44)
|
||||||
|
|
||||||
|
resp_cats = search_cats.execute()
|
||||||
|
resp_brands = search_brands.execute()
|
||||||
|
resp_products = search_products.execute()
|
||||||
|
|
||||||
results: dict = {"products": [], "categories": [], "brands": [], "posts": []}
|
results: dict = {"products": [], "categories": [], "brands": [], "posts": []}
|
||||||
uuids_by_index: dict[str, list] = {"products": [], "categories": [], "brands": []}
|
uuids_by_index: dict[str, list] = {"products": [], "categories": [], "brands": []}
|
||||||
hit_cache: list = []
|
hit_cache: list = []
|
||||||
|
|
||||||
for hit in response.hits:
|
for h in list(resp_cats.hits[:12]) + list(resp_brands.hits[:12]) + list(resp_products.hits[:26]):
|
||||||
hit_cache.append(hit)
|
hit_cache.append(h)
|
||||||
if getattr(hit, "uuid", None):
|
if getattr(h, "uuid", None):
|
||||||
uuids_by_index.setdefault(hit.meta.index, []).append(str(hit.uuid))
|
uuids_by_index.setdefault(h.meta.index, []).append(str(h.uuid))
|
||||||
|
|
||||||
products_by_uuid = {}
|
products_by_uuid = {}
|
||||||
brands_by_uuid = {}
|
brands_by_uuid = {}
|
||||||
|
|
@ -344,6 +368,12 @@ COMMON_ANALYSIS = {
|
||||||
"filter": ["lowercase", "icu_folding", "indic_norm"],
|
"filter": ["lowercase", "icu_folding", "indic_norm"],
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
"normalizer": {
|
||||||
|
"lc_norm": {
|
||||||
|
"type": "custom",
|
||||||
|
"filter": ["lowercase", "icu_folding"],
|
||||||
|
}
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -79,6 +79,26 @@ class ProductDocument(ActiveOnlyMixin, BaseDocument):
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
sku = fields.KeywordField(
|
||||||
|
attr="sku",
|
||||||
|
normalizer="lc_norm",
|
||||||
|
fields={
|
||||||
|
"raw": fields.KeywordField(normalizer="lc_norm"),
|
||||||
|
"ngram": fields.TextField(analyzer="name_ngram", search_analyzer="icu_query"),
|
||||||
|
"auto": fields.TextField(analyzer="autocomplete", search_analyzer="autocomplete_search"),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
partnumber = fields.KeywordField(
|
||||||
|
attr="partnumber",
|
||||||
|
normalizer="lc_norm",
|
||||||
|
fields={
|
||||||
|
"raw": fields.KeywordField(normalizer="lc_norm"),
|
||||||
|
"ngram": fields.TextField(analyzer="name_ngram", search_analyzer="icu_query"),
|
||||||
|
"auto": fields.TextField(analyzer="autocomplete", search_analyzer="autocomplete_search"),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
def get_queryset(self):
|
def get_queryset(self):
|
||||||
return (
|
return (
|
||||||
super()
|
super()
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue