Features: 1) Add seen_keys mechanism to avoid duplicate hits in Elasticsearch query results; 2) Introduce _collect_hits helper function for processing and storing hits; 3) Add exact-match queries for categories, brands, and products to improve search accuracy.
Fixes: 1) Prevent duplicate entries in hit processing by checking `seen_keys`. Extra: Refactor query-building logic for consistency and readability; minor performance optimizations in query execution.
This commit is contained in:
parent
e894affad7
commit
c263182414
1 changed files with 51 additions and 0 deletions
|
|
@ -228,12 +228,63 @@ def process_query(
|
|||
uuids_by_index: dict[str, list[dict[str, Any]]] = {"products": [], "categories": [], "brands": []}
|
||||
hit_cache: list[Any] = []
|
||||
|
||||
seen_keys: set[tuple[str, str]] = set()
|
||||
|
||||
def _hit_key(hittee: Any) -> tuple[str, str]:
|
||||
return hittee.meta.index, str(getattr(hittee, "uuid", None) or hittee.meta.id)
|
||||
|
||||
def _collect_hits(hits: list[Any]) -> None:
|
||||
for hh in hits:
|
||||
key = _hit_key(hh)
|
||||
if key in seen_keys:
|
||||
continue
|
||||
hit_cache.append(hh)
|
||||
seen_keys.add(key)
|
||||
if getattr(hh, "uuid", None):
|
||||
uuids_by_index.setdefault(hh.meta.index, []).append({"uuid": str(hh.uuid)})
|
||||
|
||||
exact_queries_by_index: dict[str, list[Any]] = {
|
||||
"categories": [
|
||||
Q("term", **{"name.raw": {"value": query}}),
|
||||
Q("term", **{"slug": {"value": slugify(query)}}),
|
||||
],
|
||||
"brands": [
|
||||
Q("term", **{"name.raw": {"value": query}}),
|
||||
Q("term", **{"slug": {"value": slugify(query)}}),
|
||||
],
|
||||
"products": [
|
||||
Q("term", **{"name.raw": {"value": query}}),
|
||||
Q("term", **{"slug": {"value": slugify(query)}}),
|
||||
Q("term", **{"sku.raw": {"value": query.lower()}}),
|
||||
Q("term", **{"partnumber.raw": {"value": query.lower()}}),
|
||||
],
|
||||
}
|
||||
|
||||
for idx_name in ("categories", "brands", "products"):
|
||||
if idx_name in indexes:
|
||||
shoulds = exact_queries_by_index[idx_name]
|
||||
s_exact = (
|
||||
Search(index=[idx_name])
|
||||
.query(Q("bool", should=shoulds, minimum_should_match=1))
|
||||
.extra(size=5, track_total_hits=False)
|
||||
)
|
||||
try:
|
||||
resp_exact = s_exact.execute()
|
||||
except NotFoundError:
|
||||
resp_exact = None
|
||||
if resp_exact is not None and getattr(resp_exact, "hits", None):
|
||||
_collect_hits(list(resp_exact.hits))
|
||||
|
||||
for h in (
|
||||
list(resp_cats.hits[:12] if resp_cats else [])
|
||||
+ list(resp_brands.hits[:12] if resp_brands else [])
|
||||
+ list(resp_products.hits[:26] if resp_products else [])
|
||||
):
|
||||
k = _hit_key(h)
|
||||
if k in seen_keys:
|
||||
continue
|
||||
hit_cache.append(h)
|
||||
seen_keys.add(k)
|
||||
if getattr(h, "uuid", None):
|
||||
uuids_by_index.setdefault(h.meta.index, []).append({"uuid": str(h.uuid)})
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue