From 5efac0d5ff7ff90558caa5aaa827a722bfc77c5e Mon Sep 17 00:00:00 2001 From: Egor fureunoir Gorbunov Date: Thu, 19 Jun 2025 16:31:54 +0300 Subject: [PATCH] Features: 1) Add support for `.sat` suffix fields in search query with `phrase_prefix` match; 2) Incorporate lenient fuzzy match and prefix match into a combined query logic; 3) Enhance `function_score` with additional filtering for better query relevance. Fixes: 1) Handle missing or empty query input with a more explicit check; 2) Resolve potential response errors by catching `NotFoundError` exceptions in search execution. Extra: Refactor code structure to improve readability and modularity, including better slug generation logic and streamlined image URL handling. --- core/elasticsearch/__init__.py | 198 +++++++++++++++++---------------- 1 file changed, 105 insertions(+), 93 deletions(-) diff --git a/core/elasticsearch/__init__.py b/core/elasticsearch/__init__.py index f12e30bb..efdc28c6 100644 --- a/core/elasticsearch/__init__.py +++ b/core/elasticsearch/__init__.py @@ -30,114 +30,126 @@ SMART_FIELDS = [ "title.ngram^3", "title.phonetic", "title.auto^4", + "name.sat^6", + "title.sat^4", ] def process_query(query: str = "", request: Request | None = None): - """ - Perform a lenient, typo‑tolerant, multi‑index search. - - * Full‑text with fuzziness for spelling mistakes - * `bool_prefix` for edge‑ngram autocomplete / “icontains” - """ - if not query: + if not (query := query.strip()): raise ValueError(_("no search term provided.")) - query = query.strip() - try: - q = Q( - "bool", - should=[ - Q( - "multi_match", - query=query, - fields=SMART_FIELDS, - fuzziness="AUTO", - operator="and", - ), - Q( - "multi_match", - query=query, - fields=[f for f in SMART_FIELDS if f.endswith(".auto")], - type="bool_prefix", - ), - ], - minimum_should_match=1, - ) + sat_match = Q( + "multi_match", + query=query, + type="phrase_prefix", + fields=[f for f in SMART_FIELDS if ".sat" in f], + ) - functions = [ - { - "gauss": { - "sales_rank": { - "origin": 100, - "scale": 500, - "offset": 0, - "decay": 0.3, - } - }, - "weight": 3, - }, - ] + fuzzy_match = Q( + "multi_match", + query=query, + fields=SMART_FIELDS, + fuzziness="AUTO", + operator="and", + ) - boosted = Q( - "function_score", - query=q, - boost_mode="sum", - score_mode="sum", - functions=functions, - ) + prefix_match = Q( + "multi_match", + query=query, + fields=[f for f in SMART_FIELDS if f.endswith(".auto")], + type="bool_prefix", + ) - search = ( - Search(index=["products", "categories", "brands", "posts"]) - .query(boosted) - .extra(size=100) - ) - response = search.execute() + combined = Q( + "bool", + should=[sat_match, fuzzy_match, prefix_match], + minimum_should_match=1, + ) - results: dict = {"products": [], "categories": [], "brands": [], "posts": []} - for hit in response.hits: - obj_uuid = getattr(hit, "uuid", None) or hit.meta.id - obj_name = ( - getattr(hit, "name", None) or getattr(hit, "title", None) or "N/A" - ) - obj_slug = "" - raw_slug = getattr(hit, "slug", None) - if raw_slug: - obj_slug = raw_slug - elif hit.meta.index == "brands": - obj_slug = slugify(obj_name) - elif hit.meta.index == "categories": - obj_slug = slugify(f"{obj_name}") - - image_url = None - idx = hit.meta.index - if idx == "products" and request: - prod = get_object_or_404(Product, uuid=obj_uuid) - first = prod.images.order_by("priority").first() - if first and first.image: - image_url = request.build_absolute_uri(first.image.url) - elif idx == "brands" and request: - brand = get_object_or_404(Brand, uuid=obj_uuid) - if brand.small_logo: - image_url = request.build_absolute_uri(brand.small_logo.url) - elif idx == "categories" and request: - cat = get_object_or_404(Category, uuid=obj_uuid) - if cat.image: - image_url = request.build_absolute_uri(cat.image.url) - - results[idx].append( - { - "uuid": str(obj_uuid), - "name": obj_name, - "slug": obj_slug, - "image": image_url, + functions = [ + { + "filter": Q("prefix", **{"name.raw": query.lower()}), + "weight": 5, + }, + { + "gauss": { + "sales_rank": { + "origin": 100, + "scale": 500, + "offset": 0, + "decay": 0.3, } - ) + }, + "weight": 3, + }, + ] - return results + boosted = Q( + "function_score", + query=combined, + boost_mode="sum", + score_mode="sum", + functions=functions, + ) + + search = ( + Search(index=["products", "categories", "brands", "posts"]) + .query(boosted) + .extra(size=100) + ) + + try: + response = search.execute() except NotFoundError: raise Http404 + results = {"products": [], "categories": [], "brands": [], "posts": []} + for hit in response.hits: + obj_uuid = getattr(hit, "uuid", None) or hit.meta.id + obj_name = getattr(hit, "name", None) or getattr(hit, "title", None) or "N/A" + raw_slug = getattr(hit, "slug", None) or "" + obj_slug = ( + raw_slug or slugify(obj_name) + if hit.meta.index in {"brands", "categories"} + else raw_slug + ) + image_url = None + idx = hit.meta.index + + if request: + if idx == "products": + prod = get_object_or_404(Product, uuid=obj_uuid) + first = prod.images.order_by("priority").first() + image_url = ( + request.build_absolute_uri(first.image.url) + if first and first.image + else None + ) + if idx == "brands": + brand = get_object_or_404(Brand, uuid=obj_uuid) + image_url = ( + request.build_absolute_uri(brand.small_logo.url) + if brand.small_logo + else None + ) + if idx == "categories": + cat = get_object_or_404(Category, uuid=obj_uuid) + image_url = ( + request.build_absolute_uri(cat.image.url) if cat.image else None + ) + + results[idx].append( + { + "uuid": str(obj_uuid), + "name": obj_name, + "slug": obj_slug, + "image": image_url, + } + ) + + return results + LANGUAGE_ANALYZER_MAP = { "ar": "arabic",