Features: 1) Add seen_keys mechanism to avoid duplicate hits in Elasticsearch query results; 2) Introduce _collect_hits helper function for processing and storing hits; 3) Add exact-match queries for categories, brands, and products to improve search accuracy.
Fixes: 1) Prevent duplicate entries in hit processing by checking `seen_keys`. Extra: Refactor query-building logic for consistency and readability; minor performance optimizations in query execution.
This commit is contained in:
parent
e894affad7
commit
c263182414
1 changed files with 51 additions and 0 deletions
|
|
@ -228,12 +228,63 @@ def process_query(
|
||||||
uuids_by_index: dict[str, list[dict[str, Any]]] = {"products": [], "categories": [], "brands": []}
|
uuids_by_index: dict[str, list[dict[str, Any]]] = {"products": [], "categories": [], "brands": []}
|
||||||
hit_cache: list[Any] = []
|
hit_cache: list[Any] = []
|
||||||
|
|
||||||
|
seen_keys: set[tuple[str, str]] = set()
|
||||||
|
|
||||||
|
def _hit_key(hittee: Any) -> tuple[str, str]:
|
||||||
|
return hittee.meta.index, str(getattr(hittee, "uuid", None) or hittee.meta.id)
|
||||||
|
|
||||||
|
def _collect_hits(hits: list[Any]) -> None:
|
||||||
|
for hh in hits:
|
||||||
|
key = _hit_key(hh)
|
||||||
|
if key in seen_keys:
|
||||||
|
continue
|
||||||
|
hit_cache.append(hh)
|
||||||
|
seen_keys.add(key)
|
||||||
|
if getattr(hh, "uuid", None):
|
||||||
|
uuids_by_index.setdefault(hh.meta.index, []).append({"uuid": str(hh.uuid)})
|
||||||
|
|
||||||
|
exact_queries_by_index: dict[str, list[Any]] = {
|
||||||
|
"categories": [
|
||||||
|
Q("term", **{"name.raw": {"value": query}}),
|
||||||
|
Q("term", **{"slug": {"value": slugify(query)}}),
|
||||||
|
],
|
||||||
|
"brands": [
|
||||||
|
Q("term", **{"name.raw": {"value": query}}),
|
||||||
|
Q("term", **{"slug": {"value": slugify(query)}}),
|
||||||
|
],
|
||||||
|
"products": [
|
||||||
|
Q("term", **{"name.raw": {"value": query}}),
|
||||||
|
Q("term", **{"slug": {"value": slugify(query)}}),
|
||||||
|
Q("term", **{"sku.raw": {"value": query.lower()}}),
|
||||||
|
Q("term", **{"partnumber.raw": {"value": query.lower()}}),
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
for idx_name in ("categories", "brands", "products"):
|
||||||
|
if idx_name in indexes:
|
||||||
|
shoulds = exact_queries_by_index[idx_name]
|
||||||
|
s_exact = (
|
||||||
|
Search(index=[idx_name])
|
||||||
|
.query(Q("bool", should=shoulds, minimum_should_match=1))
|
||||||
|
.extra(size=5, track_total_hits=False)
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
resp_exact = s_exact.execute()
|
||||||
|
except NotFoundError:
|
||||||
|
resp_exact = None
|
||||||
|
if resp_exact is not None and getattr(resp_exact, "hits", None):
|
||||||
|
_collect_hits(list(resp_exact.hits))
|
||||||
|
|
||||||
for h in (
|
for h in (
|
||||||
list(resp_cats.hits[:12] if resp_cats else [])
|
list(resp_cats.hits[:12] if resp_cats else [])
|
||||||
+ list(resp_brands.hits[:12] if resp_brands else [])
|
+ list(resp_brands.hits[:12] if resp_brands else [])
|
||||||
+ list(resp_products.hits[:26] if resp_products else [])
|
+ list(resp_products.hits[:26] if resp_products else [])
|
||||||
):
|
):
|
||||||
|
k = _hit_key(h)
|
||||||
|
if k in seen_keys:
|
||||||
|
continue
|
||||||
hit_cache.append(h)
|
hit_cache.append(h)
|
||||||
|
seen_keys.add(k)
|
||||||
if getattr(h, "uuid", None):
|
if getattr(h, "uuid", None):
|
||||||
uuids_by_index.setdefault(h.meta.index, []).append({"uuid": str(h.uuid)})
|
uuids_by_index.setdefault(h.meta.index, []).append({"uuid": str(h.uuid)})
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue