Features: 1) Add suggest, price, and quantity fields to ProductDocument; 2) Introduce prepare_suggest method for enhancing autocomplete functionality; 3) Enhance search with filters, function scoring, and result aggregation; 4) Add new analyzers and token filters for synonyms and stopwords.

Fixes: 1) Remove unused imports in `core/elasticsearch/__init__.py`.

Extra: Refactor `process_query` for improved readability and functionality; update aggregation and result processing logic; reformat and clean up code.
This commit is contained in:
Egor Pavlovich Gorbunov 2025-06-20 03:55:56 +03:00
parent efd927f4d1
commit 97829d23a6
2 changed files with 129 additions and 81 deletions

View file

@ -1,12 +1,9 @@
from django.conf import settings
from django.http import Http404
from django.shortcuts import get_object_or_404
from django.utils.text import slugify
from django.utils.translation import gettext_lazy as _
from django_elasticsearch_dsl import fields
from django_elasticsearch_dsl.registries import registry
from elasticsearch import NotFoundError
from elasticsearch.dsl import Q, Search
from elasticsearch.dsl import SF, Q, Search
from rest_framework.request import Request
from core.models import Brand, Category, Product
@ -33,88 +30,120 @@ SMART_FIELDS = [
]
def process_query(query: str = "", request: Request | None = None):
"""
Perform a lenient, typotolerant, multiindex search.
* Fulltext with fuzziness for spelling mistakes
* `bool_prefix` for edgengram autocomplete / icontains
"""
def process_query(
query: str = "", request: Request | None = None, filters: dict | None = None
):
if not query:
raise ValueError(_("no search term provided."))
raise ValueError("no search term provided.")
filters = filters or {}
base_q = Q(
"bool",
should=[
Q(
"multi_match",
query=query,
fields=SMART_FIELDS,
fuzziness="AUTO",
operator="and",
),
Q(
"multi_match",
query=query,
fields=[f for f in SMART_FIELDS if f.endswith(".auto")],
type="bool_prefix",
),
],
minimum_should_match=1,
)
functions = [
SF("field_value_factor", field="quantity", modifier="log1p", missing=0),
SF("field_value_factor", field="rating", modifier="sqrt", missing=0),
SF("exp", field="created_at", origin="now", scale="30d"), # newness boost
]
fq = [
Q("function_score", query=base_q, functions=functions, boost_mode="sum"),
]
if "category" in filters:
fq.append(Q("term", category__slug=filters["category"]))
if "brand" in filters:
fq.append(Q("term", brand__slug=filters["brand"]))
if "price_min" in filters or "price_max" in filters:
range_q = {}
if "price_min" in filters:
range_q["gte"] = filters["price_min"]
if "price_max" in filters:
range_q["lte"] = filters["price_max"]
fq.append(Q("range", price=range_q))
query = query.strip()
try:
q = Q(
"bool",
should=[
Q(
"multi_match",
query=query,
fields=SMART_FIELDS,
fuzziness="AUTO",
operator="and",
),
Q(
"multi_match",
query=query,
fields=[f for f in SMART_FIELDS if f.endswith(".auto")],
type="bool_prefix",
),
],
minimum_should_match=1,
search = (
Search(index=["products", "categories", "brands", "posts"])
.query(base_q)
.filter(*fq)
.extra(size=20)
.highlight("description", fragment_size=150)
.extra(
aggs={
"by_category": {"terms": {"field": "category.keyword"}},
"by_brand": {"terms": {"field": "brand.keyword"}},
"price_stats": {"stats": {"field": "price"}},
}
)
)
response = search.execute()
results: dict = {"products": [], "categories": [], "brands": [], "posts": []}
for hit in response.hits:
obj_uuid = getattr(hit, "uuid", None) or hit.meta.id
obj_name = getattr(hit, "name", None) or getattr(hit, "title", None) or "N/A"
obj_slug = ""
raw_slug = getattr(hit, "slug", None)
if raw_slug:
obj_slug = raw_slug
elif hit.meta.index == "brands":
obj_slug = slugify(obj_name)
elif hit.meta.index == "categories":
obj_slug = slugify(f"{obj_name}")
image_url = None
idx = hit.meta.index
if idx == "products" and request:
prod = get_object_or_404(Product, uuid=obj_uuid)
first = prod.images.order_by("priority").first()
if first and first.image:
image_url = request.build_absolute_uri(first.image.url)
elif idx == "brands" and request:
brand = get_object_or_404(Brand, uuid=obj_uuid)
if brand.small_logo:
image_url = request.build_absolute_uri(brand.small_logo.url)
elif idx == "categories" and request:
cat = get_object_or_404(Category, uuid=obj_uuid)
if cat.image:
image_url = request.build_absolute_uri(cat.image.url)
results[idx].append(
{
"uuid": str(obj_uuid),
"name": obj_name,
"slug": obj_slug,
"image": image_url,
}
)
search = (
Search(index=["products", "categories", "brands", "posts"])
.query(q)
.extra(size=100)
)
response = search.execute()
facets = {
"categories": [
(b.key, b.doc_count) for b in response.aggregations.by_category.buckets
],
"brands": [
(b.key, b.doc_count) for b in response.aggregations.by_brand.buckets
],
"price": {
"min": response.aggregations.price_stats.min,
"max": response.aggregations.price_stats.max,
"avg": response.aggregations.price_stats.avg,
},
}
results: dict = {"products": [], "categories": [], "brands": [], "posts": []}
for hit in response.hits:
obj_uuid = getattr(hit, "uuid", None) or hit.meta.id
obj_name = (
getattr(hit, "name", None) or getattr(hit, "title", None) or "N/A"
)
obj_slug = ""
raw_slug = getattr(hit, "slug", None)
if raw_slug:
obj_slug = raw_slug
elif hit.meta.index == "brands":
obj_slug = slugify(obj_name)
elif hit.meta.index == "categories":
obj_slug = slugify(f"{obj_name}")
image_url = None
idx = hit.meta.index
if idx == "products" and request:
prod = get_object_or_404(Product, uuid=obj_uuid)
first = prod.images.order_by("priority").first()
if first and first.image:
image_url = request.build_absolute_uri(first.image.url)
elif idx == "brands" and request:
brand = get_object_or_404(Brand, uuid=obj_uuid)
if brand.small_logo:
image_url = request.build_absolute_uri(brand.small_logo.url)
elif idx == "categories" and request:
cat = get_object_or_404(Category, uuid=obj_uuid)
if cat.image:
image_url = request.build_absolute_uri(cat.image.url)
results[idx].append(
{
"uuid": str(obj_uuid),
"name": obj_name,
"slug": obj_slug,
"image": image_url,
}
)
return results
except NotFoundError:
raise Http404
return {"results": results, "facets": facets}
LANGUAGE_ANALYZER_MAP = {
@ -163,6 +192,8 @@ COMMON_ANALYSIS = {
"encoder": "double_metaphone",
"replace": False,
},
"synonym_filter": {"type": "synonym", "synonyms_path": "analysis/synonyms.txt"},
"english_stop": {"type": "stop", "stopwords": "_english_"},
},
"analyzer": {
"autocomplete": {
@ -177,6 +208,10 @@ COMMON_ANALYSIS = {
"tokenizer": "standard",
"filter": ["lowercase", "asciifolding", "ngram_filter"],
},
"synonym_analyzer": {
"tokenizer": "standard",
"filter": ["lowercase", "asciifolding", "synonym_filter"],
},
"name_phonetic": {
"tokenizer": "standard",
"filter": ["lowercase", "asciifolding", "double_metaphone"],

View file

@ -52,6 +52,12 @@ class _BaseDoc(ActiveOnlyMixin, Document):
class ProductDocument(_BaseDoc):
suggest = fields.CompletionField(
analyzer="autocomplete",
search_analyzer="autocomplete_search",
)
price = fields.FloatField(attr="price")
quantity = fields.IntegerField(attr="quantity")
rating = fields.FloatField(attr="rating")
class Index(_BaseDoc.Index):
@ -61,6 +67,13 @@ class ProductDocument(_BaseDoc):
model = Product
fields = ["uuid"]
def prepare_suggest(self, instance):
terms = [instance.name]
if instance.brand:
terms.append(instance.brand.name)
terms.append(instance.category.name)
return {"input": terms, "weight": int(instance.quantity)}
_add_multilang_fields(ProductDocument)
registry.register_document(ProductDocument)