From 41b6c1aa073f0e78de34a941f9038480b03f171f Mon Sep 17 00:00:00 2001 From: Egor fureunoir Gorbunov Date: Fri, 20 Jun 2025 05:12:11 +0300 Subject: [PATCH] Revert "Features: 1) Add `suggest`, `price`, and `quantity` fields to `ProductDocument`; 2) Introduce `prepare_suggest` method for enhancing autocomplete functionality; 3) Enhance search with filters, function scoring, and result aggregation; 4) Add new analyzers and token filters for synonyms and stopwords." This reverts commit 97829d23a619b441a52925354d7f556c1381a8bf. --- core/elasticsearch/__init__.py | 197 +++++++++++++------------------- core/elasticsearch/documents.py | 13 --- 2 files changed, 81 insertions(+), 129 deletions(-) diff --git a/core/elasticsearch/__init__.py b/core/elasticsearch/__init__.py index d0d0c948..473f93af 100644 --- a/core/elasticsearch/__init__.py +++ b/core/elasticsearch/__init__.py @@ -1,9 +1,12 @@ from django.conf import settings +from django.http import Http404 from django.shortcuts import get_object_or_404 from django.utils.text import slugify +from django.utils.translation import gettext_lazy as _ from django_elasticsearch_dsl import fields from django_elasticsearch_dsl.registries import registry -from elasticsearch.dsl import SF, Q, Search +from elasticsearch import NotFoundError +from elasticsearch.dsl import Q, Search from rest_framework.request import Request from core.models import Brand, Category, Product @@ -30,120 +33,88 @@ SMART_FIELDS = [ ] -def process_query( - query: str = "", request: Request | None = None, filters: dict | None = None -): +def process_query(query: str = "", request: Request | None = None): + """ + Perform a lenient, typo‑tolerant, multi‑index search. + + * Full‑text with fuzziness for spelling mistakes + * `bool_prefix` for edge‑ngram autocomplete / “icontains” + """ if not query: - raise ValueError("no search term provided.") - filters = filters or {} - base_q = Q( - "bool", - should=[ - Q( - "multi_match", - query=query, - fields=SMART_FIELDS, - fuzziness="AUTO", - operator="and", - ), - Q( - "multi_match", - query=query, - fields=[f for f in SMART_FIELDS if f.endswith(".auto")], - type="bool_prefix", - ), - ], - minimum_should_match=1, - ) - functions = [ - SF("field_value_factor", field="quantity", modifier="log1p", missing=0), - SF("field_value_factor", field="rating", modifier="sqrt", missing=0), - SF("exp", field="created_at", origin="now", scale="30d"), # newness boost - ] - fq = [ - Q("function_score", query=base_q, functions=functions, boost_mode="sum"), - ] - if "category" in filters: - fq.append(Q("term", category__slug=filters["category"])) - if "brand" in filters: - fq.append(Q("term", brand__slug=filters["brand"])) - if "price_min" in filters or "price_max" in filters: - range_q = {} - if "price_min" in filters: - range_q["gte"] = filters["price_min"] - if "price_max" in filters: - range_q["lte"] = filters["price_max"] - fq.append(Q("range", price=range_q)) + raise ValueError(_("no search term provided.")) - search = ( - Search(index=["products", "categories", "brands", "posts"]) - .query(base_q) - .filter(*fq) - .extra(size=20) - .highlight("description", fragment_size=150) - .extra( - aggs={ - "by_category": {"terms": {"field": "category.keyword"}}, - "by_brand": {"terms": {"field": "brand.keyword"}}, - "price_stats": {"stats": {"field": "price"}}, - } - ) - ) - response = search.execute() - - results: dict = {"products": [], "categories": [], "brands": [], "posts": []} - for hit in response.hits: - obj_uuid = getattr(hit, "uuid", None) or hit.meta.id - obj_name = getattr(hit, "name", None) or getattr(hit, "title", None) or "N/A" - obj_slug = "" - raw_slug = getattr(hit, "slug", None) - if raw_slug: - obj_slug = raw_slug - elif hit.meta.index == "brands": - obj_slug = slugify(obj_name) - elif hit.meta.index == "categories": - obj_slug = slugify(f"{obj_name}") - - image_url = None - idx = hit.meta.index - if idx == "products" and request: - prod = get_object_or_404(Product, uuid=obj_uuid) - first = prod.images.order_by("priority").first() - if first and first.image: - image_url = request.build_absolute_uri(first.image.url) - elif idx == "brands" and request: - brand = get_object_or_404(Brand, uuid=obj_uuid) - if brand.small_logo: - image_url = request.build_absolute_uri(brand.small_logo.url) - elif idx == "categories" and request: - cat = get_object_or_404(Category, uuid=obj_uuid) - if cat.image: - image_url = request.build_absolute_uri(cat.image.url) - - results[idx].append( - { - "uuid": str(obj_uuid), - "name": obj_name, - "slug": obj_slug, - "image": image_url, - } + query = query.strip() + try: + q = Q( + "bool", + should=[ + Q( + "multi_match", + query=query, + fields=SMART_FIELDS, + fuzziness="AUTO", + operator="and", + ), + Q( + "multi_match", + query=query, + fields=[f for f in SMART_FIELDS if f.endswith(".auto")], + type="bool_prefix", + ), + ], + minimum_should_match=1, ) - facets = { - "categories": [ - (b.key, b.doc_count) for b in response.aggregations.by_category.buckets - ], - "brands": [ - (b.key, b.doc_count) for b in response.aggregations.by_brand.buckets - ], - "price": { - "min": response.aggregations.price_stats.min, - "max": response.aggregations.price_stats.max, - "avg": response.aggregations.price_stats.avg, - }, - } + search = ( + Search(index=["products", "categories", "brands", "posts"]) + .query(q) + .extra(size=100) + ) + response = search.execute() - return {"results": results, "facets": facets} + results: dict = {"products": [], "categories": [], "brands": [], "posts": []} + for hit in response.hits: + obj_uuid = getattr(hit, "uuid", None) or hit.meta.id + obj_name = ( + getattr(hit, "name", None) or getattr(hit, "title", None) or "N/A" + ) + obj_slug = "" + raw_slug = getattr(hit, "slug", None) + if raw_slug: + obj_slug = raw_slug + elif hit.meta.index == "brands": + obj_slug = slugify(obj_name) + elif hit.meta.index == "categories": + obj_slug = slugify(f"{obj_name}") + + image_url = None + idx = hit.meta.index + if idx == "products" and request: + prod = get_object_or_404(Product, uuid=obj_uuid) + first = prod.images.order_by("priority").first() + if first and first.image: + image_url = request.build_absolute_uri(first.image.url) + elif idx == "brands" and request: + brand = get_object_or_404(Brand, uuid=obj_uuid) + if brand.small_logo: + image_url = request.build_absolute_uri(brand.small_logo.url) + elif idx == "categories" and request: + cat = get_object_or_404(Category, uuid=obj_uuid) + if cat.image: + image_url = request.build_absolute_uri(cat.image.url) + + results[idx].append( + { + "uuid": str(obj_uuid), + "name": obj_name, + "slug": obj_slug, + "image": image_url, + } + ) + + return results + except NotFoundError: + raise Http404 LANGUAGE_ANALYZER_MAP = { @@ -192,8 +163,6 @@ COMMON_ANALYSIS = { "encoder": "double_metaphone", "replace": False, }, - "synonym_filter": {"type": "synonym", "synonyms_path": "analysis/synonyms.txt"}, - "english_stop": {"type": "stop", "stopwords": "_english_"}, }, "analyzer": { "autocomplete": { @@ -208,10 +177,6 @@ COMMON_ANALYSIS = { "tokenizer": "standard", "filter": ["lowercase", "asciifolding", "ngram_filter"], }, - "synonym_analyzer": { - "tokenizer": "standard", - "filter": ["lowercase", "asciifolding", "synonym_filter"], - }, "name_phonetic": { "tokenizer": "standard", "filter": ["lowercase", "asciifolding", "double_metaphone"], diff --git a/core/elasticsearch/documents.py b/core/elasticsearch/documents.py index 908ea9c9..4a2c895e 100644 --- a/core/elasticsearch/documents.py +++ b/core/elasticsearch/documents.py @@ -52,12 +52,6 @@ class _BaseDoc(ActiveOnlyMixin, Document): class ProductDocument(_BaseDoc): - suggest = fields.CompletionField( - analyzer="autocomplete", - search_analyzer="autocomplete_search", - ) - price = fields.FloatField(attr="price") - quantity = fields.IntegerField(attr="quantity") rating = fields.FloatField(attr="rating") class Index(_BaseDoc.Index): @@ -67,13 +61,6 @@ class ProductDocument(_BaseDoc): model = Product fields = ["uuid"] - def prepare_suggest(self, instance): - terms = [instance.name] - if instance.brand: - terms.append(instance.brand.name) - terms.append(instance.category.name) - return {"input": terms, "weight": int(instance.quantity)} - _add_multilang_fields(ProductDocument) registry.register_document(ProductDocument)