diff --git a/core/elasticsearch/__init__.py b/core/elasticsearch/__init__.py index 473f93af..d0d0c948 100644 --- a/core/elasticsearch/__init__.py +++ b/core/elasticsearch/__init__.py @@ -1,12 +1,9 @@ from django.conf import settings -from django.http import Http404 from django.shortcuts import get_object_or_404 from django.utils.text import slugify -from django.utils.translation import gettext_lazy as _ from django_elasticsearch_dsl import fields from django_elasticsearch_dsl.registries import registry -from elasticsearch import NotFoundError -from elasticsearch.dsl import Q, Search +from elasticsearch.dsl import SF, Q, Search from rest_framework.request import Request from core.models import Brand, Category, Product @@ -33,88 +30,120 @@ SMART_FIELDS = [ ] -def process_query(query: str = "", request: Request | None = None): - """ - Perform a lenient, typo‑tolerant, multi‑index search. - - * Full‑text with fuzziness for spelling mistakes - * `bool_prefix` for edge‑ngram autocomplete / “icontains” - """ +def process_query( + query: str = "", request: Request | None = None, filters: dict | None = None +): if not query: - raise ValueError(_("no search term provided.")) + raise ValueError("no search term provided.") + filters = filters or {} + base_q = Q( + "bool", + should=[ + Q( + "multi_match", + query=query, + fields=SMART_FIELDS, + fuzziness="AUTO", + operator="and", + ), + Q( + "multi_match", + query=query, + fields=[f for f in SMART_FIELDS if f.endswith(".auto")], + type="bool_prefix", + ), + ], + minimum_should_match=1, + ) + functions = [ + SF("field_value_factor", field="quantity", modifier="log1p", missing=0), + SF("field_value_factor", field="rating", modifier="sqrt", missing=0), + SF("exp", field="created_at", origin="now", scale="30d"), # newness boost + ] + fq = [ + Q("function_score", query=base_q, functions=functions, boost_mode="sum"), + ] + if "category" in filters: + fq.append(Q("term", category__slug=filters["category"])) + if "brand" in filters: + fq.append(Q("term", brand__slug=filters["brand"])) + if "price_min" in filters or "price_max" in filters: + range_q = {} + if "price_min" in filters: + range_q["gte"] = filters["price_min"] + if "price_max" in filters: + range_q["lte"] = filters["price_max"] + fq.append(Q("range", price=range_q)) - query = query.strip() - try: - q = Q( - "bool", - should=[ - Q( - "multi_match", - query=query, - fields=SMART_FIELDS, - fuzziness="AUTO", - operator="and", - ), - Q( - "multi_match", - query=query, - fields=[f for f in SMART_FIELDS if f.endswith(".auto")], - type="bool_prefix", - ), - ], - minimum_should_match=1, + search = ( + Search(index=["products", "categories", "brands", "posts"]) + .query(base_q) + .filter(*fq) + .extra(size=20) + .highlight("description", fragment_size=150) + .extra( + aggs={ + "by_category": {"terms": {"field": "category.keyword"}}, + "by_brand": {"terms": {"field": "brand.keyword"}}, + "price_stats": {"stats": {"field": "price"}}, + } + ) + ) + response = search.execute() + + results: dict = {"products": [], "categories": [], "brands": [], "posts": []} + for hit in response.hits: + obj_uuid = getattr(hit, "uuid", None) or hit.meta.id + obj_name = getattr(hit, "name", None) or getattr(hit, "title", None) or "N/A" + obj_slug = "" + raw_slug = getattr(hit, "slug", None) + if raw_slug: + obj_slug = raw_slug + elif hit.meta.index == "brands": + obj_slug = slugify(obj_name) + elif hit.meta.index == "categories": + obj_slug = slugify(f"{obj_name}") + + image_url = None + idx = hit.meta.index + if idx == "products" and request: + prod = get_object_or_404(Product, uuid=obj_uuid) + first = prod.images.order_by("priority").first() + if first and first.image: + image_url = request.build_absolute_uri(first.image.url) + elif idx == "brands" and request: + brand = get_object_or_404(Brand, uuid=obj_uuid) + if brand.small_logo: + image_url = request.build_absolute_uri(brand.small_logo.url) + elif idx == "categories" and request: + cat = get_object_or_404(Category, uuid=obj_uuid) + if cat.image: + image_url = request.build_absolute_uri(cat.image.url) + + results[idx].append( + { + "uuid": str(obj_uuid), + "name": obj_name, + "slug": obj_slug, + "image": image_url, + } ) - search = ( - Search(index=["products", "categories", "brands", "posts"]) - .query(q) - .extra(size=100) - ) - response = search.execute() + facets = { + "categories": [ + (b.key, b.doc_count) for b in response.aggregations.by_category.buckets + ], + "brands": [ + (b.key, b.doc_count) for b in response.aggregations.by_brand.buckets + ], + "price": { + "min": response.aggregations.price_stats.min, + "max": response.aggregations.price_stats.max, + "avg": response.aggregations.price_stats.avg, + }, + } - results: dict = {"products": [], "categories": [], "brands": [], "posts": []} - for hit in response.hits: - obj_uuid = getattr(hit, "uuid", None) or hit.meta.id - obj_name = ( - getattr(hit, "name", None) or getattr(hit, "title", None) or "N/A" - ) - obj_slug = "" - raw_slug = getattr(hit, "slug", None) - if raw_slug: - obj_slug = raw_slug - elif hit.meta.index == "brands": - obj_slug = slugify(obj_name) - elif hit.meta.index == "categories": - obj_slug = slugify(f"{obj_name}") - - image_url = None - idx = hit.meta.index - if idx == "products" and request: - prod = get_object_or_404(Product, uuid=obj_uuid) - first = prod.images.order_by("priority").first() - if first and first.image: - image_url = request.build_absolute_uri(first.image.url) - elif idx == "brands" and request: - brand = get_object_or_404(Brand, uuid=obj_uuid) - if brand.small_logo: - image_url = request.build_absolute_uri(brand.small_logo.url) - elif idx == "categories" and request: - cat = get_object_or_404(Category, uuid=obj_uuid) - if cat.image: - image_url = request.build_absolute_uri(cat.image.url) - - results[idx].append( - { - "uuid": str(obj_uuid), - "name": obj_name, - "slug": obj_slug, - "image": image_url, - } - ) - - return results - except NotFoundError: - raise Http404 + return {"results": results, "facets": facets} LANGUAGE_ANALYZER_MAP = { @@ -163,6 +192,8 @@ COMMON_ANALYSIS = { "encoder": "double_metaphone", "replace": False, }, + "synonym_filter": {"type": "synonym", "synonyms_path": "analysis/synonyms.txt"}, + "english_stop": {"type": "stop", "stopwords": "_english_"}, }, "analyzer": { "autocomplete": { @@ -177,6 +208,10 @@ COMMON_ANALYSIS = { "tokenizer": "standard", "filter": ["lowercase", "asciifolding", "ngram_filter"], }, + "synonym_analyzer": { + "tokenizer": "standard", + "filter": ["lowercase", "asciifolding", "synonym_filter"], + }, "name_phonetic": { "tokenizer": "standard", "filter": ["lowercase", "asciifolding", "double_metaphone"], diff --git a/core/elasticsearch/documents.py b/core/elasticsearch/documents.py index 4a2c895e..908ea9c9 100644 --- a/core/elasticsearch/documents.py +++ b/core/elasticsearch/documents.py @@ -52,6 +52,12 @@ class _BaseDoc(ActiveOnlyMixin, Document): class ProductDocument(_BaseDoc): + suggest = fields.CompletionField( + analyzer="autocomplete", + search_analyzer="autocomplete_search", + ) + price = fields.FloatField(attr="price") + quantity = fields.IntegerField(attr="quantity") rating = fields.FloatField(attr="rating") class Index(_BaseDoc.Index): @@ -61,6 +67,13 @@ class ProductDocument(_BaseDoc): model = Product fields = ["uuid"] + def prepare_suggest(self, instance): + terms = [instance.name] + if instance.brand: + terms.append(instance.brand.name) + terms.append(instance.category.name) + return {"input": terms, "weight": int(instance.quantity)} + _add_multilang_fields(ProductDocument) registry.register_document(ProductDocument)