Features: 1) Add brand_name and category_name fields to ProductDocument for enhanced search indexing; 2) Introduce expanded ICU-based analyzers for multi-language support, including Arabic, Indic, and CJK; 3) Enable search enhancements with revised weights in SMART_FIELDS and additional language-specific query mappings.
Fixes: 1) Correct typo in total_orders field name in ProductDocument; 2) Replace outdated query_lc analyzer with icu_query for consistent Unicode handling; 3) Remove unnecessary get_object_or_404 calls to optimize batch query performance. Extra: Refactor process_query function with clearer structure and batch-loading logic; streamline brand and category field handling across documents; improve reboot scripts to rebuild search index automatically.
This commit is contained in:
parent
6e83562ee6
commit
0a375ad0d1
4 changed files with 195 additions and 104 deletions
|
|
@ -1,6 +1,5 @@
|
|||
from django.conf import settings
|
||||
from django.http import Http404
|
||||
from django.shortcuts import get_object_or_404
|
||||
from django.utils.text import slugify
|
||||
from django.utils.translation import gettext_lazy as _
|
||||
from django_elasticsearch_dsl import fields
|
||||
|
|
@ -12,25 +11,24 @@ from rest_framework.request import Request
|
|||
from core.models import Brand, Category, Product
|
||||
|
||||
SMART_FIELDS = [
|
||||
"name^6",
|
||||
"name.ngram^5",
|
||||
"name.phonetic",
|
||||
"title^4",
|
||||
"title.ngram^3",
|
||||
"title.phonetic",
|
||||
"name^8",
|
||||
"name.ngram^6",
|
||||
"name.phonetic^3",
|
||||
"title^5",
|
||||
"title.ngram^4",
|
||||
"title.phonetic^2",
|
||||
"description^2",
|
||||
"description.ngram",
|
||||
"description.phonetic",
|
||||
"brand__name^3",
|
||||
"brand__name.ngram",
|
||||
"brand__name.auto",
|
||||
"category__name^2",
|
||||
"category__name.ngram",
|
||||
"category__name.auto",
|
||||
"brand_name^4",
|
||||
"brand_name.ngram^3",
|
||||
"brand_name.auto^3",
|
||||
"category_name^3",
|
||||
"category_name.ngram^2",
|
||||
"category_name.auto^2",
|
||||
]
|
||||
|
||||
functions = [
|
||||
# product-level boosts when searching for products
|
||||
{
|
||||
"filter": Q("term", **{"_index": "products"}),
|
||||
"field_value_factor": {
|
||||
|
|
@ -67,7 +65,6 @@ functions = [
|
|||
"missing": 0,
|
||||
},
|
||||
},
|
||||
# category-level boost when searching for categories
|
||||
{
|
||||
"filter": Q("term", **{"_index": "categories"}),
|
||||
"field_value_factor": {
|
||||
|
|
@ -77,7 +74,6 @@ functions = [
|
|||
"missing": 0,
|
||||
},
|
||||
},
|
||||
# brand-level boost when searching for brands
|
||||
{
|
||||
"filter": Q("term", **{"_index": "brands"}),
|
||||
"field_value_factor": {
|
||||
|
|
@ -91,34 +87,35 @@ functions = [
|
|||
|
||||
|
||||
def process_query(query: str = "", request: Request | None = None) -> dict[str, list[dict]] | None:
|
||||
"""
|
||||
Perform a lenient, typo‑tolerant, multi‑index search.
|
||||
|
||||
* Full‑text with fuzziness for spelling mistakes
|
||||
* `bool_prefix` for edge‑ngram autocomplete / “icontains”
|
||||
"""
|
||||
if not query:
|
||||
raise ValueError(_("no search term provided."))
|
||||
|
||||
query = query.strip()
|
||||
try:
|
||||
exact_shoulds = [
|
||||
Q("term", **{"name.raw": query}),
|
||||
Q("term", **{"slug": slugify(query)}),
|
||||
]
|
||||
|
||||
text_shoulds = [
|
||||
Q(
|
||||
"multi_match",
|
||||
query=query,
|
||||
fields=SMART_FIELDS,
|
||||
fuzziness="AUTO",
|
||||
operator="and",
|
||||
),
|
||||
Q(
|
||||
"multi_match",
|
||||
query=query,
|
||||
fields=[f for f in SMART_FIELDS if f.endswith(".auto")],
|
||||
type="bool_prefix",
|
||||
),
|
||||
]
|
||||
|
||||
query_base = Q(
|
||||
"bool",
|
||||
should=[
|
||||
Q(
|
||||
"multi_match",
|
||||
query=query,
|
||||
fields=SMART_FIELDS,
|
||||
fuzziness="AUTO",
|
||||
operator="and",
|
||||
),
|
||||
Q(
|
||||
"multi_match",
|
||||
query=query,
|
||||
fields=[f for f in SMART_FIELDS if f.endswith(".auto")],
|
||||
type="bool_prefix",
|
||||
),
|
||||
],
|
||||
should=exact_shoulds + text_shoulds,
|
||||
minimum_should_match=1,
|
||||
)
|
||||
|
||||
|
|
@ -127,39 +124,61 @@ def process_query(query: str = "", request: Request | None = None) -> dict[str,
|
|||
query=query_base,
|
||||
functions=functions,
|
||||
boost_mode="multiply",
|
||||
score_mode="first",
|
||||
score_mode="sum",
|
||||
)
|
||||
|
||||
search = Search(index=["products", "categories", "brands", "posts"]).query(function_score_query).extra(size=100)
|
||||
response = search.execute()
|
||||
|
||||
# Batch-load related image data to avoid N+1 queries
|
||||
results: dict = {"products": [], "categories": [], "brands": [], "posts": []}
|
||||
uuids_by_index: dict[str, list] = {"products": [], "categories": [], "brands": []}
|
||||
hit_cache: list = []
|
||||
|
||||
for hit in response.hits:
|
||||
hit_cache.append(hit)
|
||||
if getattr(hit, "uuid", None):
|
||||
uuids_by_index.setdefault(hit.meta.index, []).append(str(hit.uuid))
|
||||
|
||||
products_by_uuid = {}
|
||||
brands_by_uuid = {}
|
||||
cats_by_uuid = {}
|
||||
|
||||
if request:
|
||||
if uuids_by_index.get("products"):
|
||||
products_by_uuid = {
|
||||
str(p.uuid): p
|
||||
for p in Product.objects.filter(uuid__in=uuids_by_index["products"])
|
||||
.select_related("brand", "category")
|
||||
.prefetch_related("images")
|
||||
}
|
||||
if uuids_by_index.get("brands"):
|
||||
brands_by_uuid = {str(b.uuid): b for b in Brand.objects.filter(uuid__in=uuids_by_index["brands"])}
|
||||
if uuids_by_index.get("categories"):
|
||||
cats_by_uuid = {str(c.uuid): c for c in Category.objects.filter(uuid__in=uuids_by_index["categories"])}
|
||||
|
||||
for hit in hit_cache:
|
||||
obj_uuid = getattr(hit, "uuid", None) or hit.meta.id
|
||||
obj_name = getattr(hit, "name", None) or getattr(hit, "title", None) or "N/A"
|
||||
obj_slug = ""
|
||||
raw_slug = getattr(hit, "slug", None)
|
||||
if raw_slug:
|
||||
obj_slug = raw_slug
|
||||
elif hit.meta.index == "brands":
|
||||
obj_slug = slugify(obj_name)
|
||||
elif hit.meta.index == "categories":
|
||||
obj_slug = slugify(f"{obj_name}")
|
||||
obj_slug = getattr(hit, "slug", "") or (
|
||||
slugify(obj_name) if hit.meta.index in {"brands", "categories"} else ""
|
||||
)
|
||||
|
||||
image_url = None
|
||||
idx = hit.meta.index
|
||||
if idx == "products" and request:
|
||||
prod = get_object_or_404(Product, uuid=obj_uuid)
|
||||
first = prod.images.order_by("priority").first()
|
||||
if first and first.image:
|
||||
image_url = request.build_absolute_uri(first.image.url)
|
||||
prod = products_by_uuid.get(str(obj_uuid))
|
||||
if prod:
|
||||
first = prod.images.order_by("priority").first()
|
||||
if first and first.image:
|
||||
image_url = request.build_absolute_uri(first.image.url)
|
||||
elif idx == "brands" and request:
|
||||
brand = get_object_or_404(Brand, uuid=obj_uuid)
|
||||
if brand.small_logo:
|
||||
brand = brands_by_uuid.get(str(obj_uuid))
|
||||
if brand and brand.small_logo:
|
||||
image_url = request.build_absolute_uri(brand.small_logo.url)
|
||||
elif idx == "categories" and request:
|
||||
cat = get_object_or_404(Category, uuid=obj_uuid)
|
||||
if cat.image:
|
||||
cat = cats_by_uuid.get(str(obj_uuid))
|
||||
if cat and cat.image:
|
||||
image_url = request.build_absolute_uri(cat.image.url)
|
||||
|
||||
hit_result = {
|
||||
|
|
@ -175,12 +194,8 @@ def process_query(query: str = "", request: Request | None = None) -> dict[str,
|
|||
hit_result["total_orders_debug"] = getattr(hit, "total_orders", 0)
|
||||
hit_result["brand_priority_debug"] = getattr(hit, "brand_priority", 0)
|
||||
hit_result["category_priority_debug"] = getattr(hit, "category_priority", 0)
|
||||
if idx == "brands":
|
||||
if idx in ("brands", "categories"):
|
||||
hit_result["priority_debug"] = getattr(hit, "priority", 0)
|
||||
if idx == "categories":
|
||||
hit_result["priority_debug"] = getattr(hit, "priority", 0)
|
||||
if idx == "posts":
|
||||
pass
|
||||
|
||||
results[idx].append(hit_result)
|
||||
|
||||
|
|
@ -190,30 +205,30 @@ def process_query(query: str = "", request: Request | None = None) -> dict[str,
|
|||
|
||||
|
||||
LANGUAGE_ANALYZER_MAP = {
|
||||
"ar": "arabic",
|
||||
"cs": "czech",
|
||||
"da": "danish",
|
||||
"de": "german",
|
||||
"en": "english",
|
||||
"es": "spanish",
|
||||
"fr": "french",
|
||||
"hi": "hindi",
|
||||
"it": "italian",
|
||||
"ja": "standard",
|
||||
"kk": "standard",
|
||||
"nl": "dutch",
|
||||
"pl": "standard",
|
||||
"pt": "portuguese",
|
||||
"ro": "romanian",
|
||||
|
||||
"ja": "cjk_search",
|
||||
"zh": "cjk_search",
|
||||
"ar": "arabic_search",
|
||||
"hi": "indic_search",
|
||||
|
||||
"ru": "russian",
|
||||
"zh": "standard",
|
||||
"pl": "standard",
|
||||
"kk": "standard",
|
||||
}
|
||||
|
||||
|
||||
def _lang_analyzer(lang_code: str) -> str:
|
||||
"""Return the best‑guess ES analyzer for an ISO language code."""
|
||||
base = lang_code.split("-")[0].lower()
|
||||
return LANGUAGE_ANALYZER_MAP.get(base, "standard")
|
||||
return LANGUAGE_ANALYZER_MAP.get(base, "icu_query")
|
||||
|
||||
|
||||
class ActiveOnlyMixin:
|
||||
|
|
@ -227,33 +242,87 @@ class ActiveOnlyMixin:
|
|||
|
||||
|
||||
COMMON_ANALYSIS = {
|
||||
"char_filter": {
|
||||
# ICU normalizer tidies up Unicode (compatibility forms etc.)
|
||||
"icu_nfkc_cf": {"type": "icu_normalizer", "name": "nfkc_cf"},
|
||||
},
|
||||
"filter": {
|
||||
"edge_ngram_filter": {"type": "edge_ngram", "min_gram": 1, "max_gram": 20},
|
||||
"ngram_filter": {"type": "ngram", "min_gram": 2, "max_gram": 20},
|
||||
"double_metaphone": {
|
||||
"type": "phonetic",
|
||||
"encoder": "double_metaphone",
|
||||
"replace": False,
|
||||
},
|
||||
|
||||
# CJK bigramming helps ja/zh when no language plugin is present
|
||||
"cjk_bigram": {"type": "cjk_bigram"},
|
||||
|
||||
# ICU casefolding/diacritics for *all* scripts
|
||||
"icu_folding": {"type": "icu_folding"},
|
||||
|
||||
# Your existing phonetic encoder (mainly helpful for Latin languages)
|
||||
"double_metaphone": {"type": "phonetic", "encoder": "double_metaphone", "replace": False},
|
||||
|
||||
# Script-specific light normalizers
|
||||
"arabic_norm": {"type": "arabic_normalization"},
|
||||
"indic_norm": {"type": "indic_normalization"},
|
||||
},
|
||||
"analyzer": {
|
||||
# Generic query analyzer: ICU normalize+fold across scripts
|
||||
"icu_query": {
|
||||
"type": "custom",
|
||||
"char_filter": ["icu_nfkc_cf"],
|
||||
"tokenizer": "icu_tokenizer",
|
||||
"filter": ["lowercase", "icu_folding"],
|
||||
},
|
||||
|
||||
# Autocomplete (works well for all scripts thanks to icu_tokenizer)
|
||||
"autocomplete": {
|
||||
"tokenizer": "standard",
|
||||
"filter": ["lowercase", "asciifolding", "edge_ngram_filter"],
|
||||
"type": "custom",
|
||||
"char_filter": ["icu_nfkc_cf"],
|
||||
"tokenizer": "icu_tokenizer",
|
||||
"filter": ["lowercase", "icu_folding", "edge_ngram_filter"],
|
||||
},
|
||||
"autocomplete_search": {
|
||||
"tokenizer": "standard",
|
||||
"filter": ["lowercase", "asciifolding"],
|
||||
"type": "custom",
|
||||
"char_filter": ["icu_nfkc_cf"],
|
||||
"tokenizer": "icu_tokenizer",
|
||||
"filter": ["lowercase", "icu_folding"],
|
||||
},
|
||||
|
||||
# Content ngram for recall (again ICU-aware)
|
||||
"name_ngram": {
|
||||
"tokenizer": "standard",
|
||||
"filter": ["lowercase", "asciifolding", "ngram_filter"],
|
||||
"type": "custom",
|
||||
"char_filter": ["icu_nfkc_cf"],
|
||||
"tokenizer": "icu_tokenizer",
|
||||
"filter": ["lowercase", "icu_folding", "ngram_filter"],
|
||||
},
|
||||
|
||||
# Phonetic for Latin fallback
|
||||
"name_phonetic": {
|
||||
"tokenizer": "standard",
|
||||
"filter": ["lowercase", "asciifolding", "double_metaphone"],
|
||||
"type": "custom",
|
||||
"char_filter": ["icu_nfkc_cf"],
|
||||
"tokenizer": "icu_tokenizer",
|
||||
"filter": ["lowercase", "icu_folding", "double_metaphone"],
|
||||
},
|
||||
|
||||
# CJK search analyzer (no stemming; bigram + ICU)
|
||||
"cjk_search": {
|
||||
"type": "custom",
|
||||
"char_filter": ["icu_nfkc_cf"],
|
||||
"tokenizer": "icu_tokenizer",
|
||||
"filter": ["lowercase", "icu_folding", "cjk_bigram"],
|
||||
},
|
||||
|
||||
# Arabic & Indic light normalizations (no stemming; reliable & fast)
|
||||
"arabic_search": {
|
||||
"type": "custom",
|
||||
"char_filter": ["icu_nfkc_cf"],
|
||||
"tokenizer": "icu_tokenizer",
|
||||
"filter": ["lowercase", "icu_folding", "arabic_norm"],
|
||||
},
|
||||
"indic_search": {
|
||||
"type": "custom",
|
||||
"char_filter": ["icu_nfkc_cf"],
|
||||
"tokenizer": "icu_tokenizer",
|
||||
"filter": ["lowercase", "icu_folding", "indic_norm"],
|
||||
},
|
||||
"query_lc": {"tokenizer": "standard", "filter": ["lowercase", "asciifolding"]},
|
||||
},
|
||||
}
|
||||
|
||||
|
|
@ -275,7 +344,7 @@ def _add_multilang_fields(cls):
|
|||
copy_to="name",
|
||||
fields={
|
||||
"raw": fields.KeywordField(ignore_above=256),
|
||||
"ngram": fields.TextField(analyzer="name_ngram", search_analyzer="query_lc"),
|
||||
"ngram": fields.TextField(analyzer="name_ngram", search_analyzer="icu_query"),
|
||||
"phonetic": fields.TextField(analyzer="name_phonetic"),
|
||||
},
|
||||
),
|
||||
|
|
@ -298,7 +367,7 @@ def _add_multilang_fields(cls):
|
|||
copy_to="description",
|
||||
fields={
|
||||
"raw": fields.KeywordField(ignore_above=256),
|
||||
"ngram": fields.TextField(analyzer="name_ngram", search_analyzer="query_lc"),
|
||||
"ngram": fields.TextField(analyzer="name_ngram", search_analyzer="icu_query"),
|
||||
"phonetic": fields.TextField(analyzer="name_phonetic"),
|
||||
},
|
||||
),
|
||||
|
|
|
|||
|
|
@ -6,13 +6,13 @@ from core.elasticsearch import COMMON_ANALYSIS, ActiveOnlyMixin, _add_multilang_
|
|||
from core.models import Brand, Category, Product
|
||||
|
||||
|
||||
class _BaseDoc(ActiveOnlyMixin, Document):
|
||||
class _BaseDoc(Document):
|
||||
name = fields.TextField(
|
||||
attr="name",
|
||||
analyzer="standard",
|
||||
fields={
|
||||
"raw": fields.KeywordField(ignore_above=256),
|
||||
"ngram": fields.TextField(analyzer="name_ngram", search_analyzer="query_lc"),
|
||||
"ngram": fields.TextField(analyzer="name_ngram", search_analyzer="icu_query"),
|
||||
"phonetic": fields.TextField(analyzer="name_phonetic"),
|
||||
"auto": fields.TextField(analyzer="autocomplete", search_analyzer="autocomplete_search"),
|
||||
},
|
||||
|
|
@ -22,7 +22,7 @@ class _BaseDoc(ActiveOnlyMixin, Document):
|
|||
analyzer="standard",
|
||||
fields={
|
||||
"raw": fields.KeywordField(ignore_above=256),
|
||||
"ngram": fields.TextField(analyzer="name_ngram", search_analyzer="query_lc"),
|
||||
"ngram": fields.TextField(analyzer="name_ngram", search_analyzer="icu_query"),
|
||||
"phonetic": fields.TextField(analyzer="name_phonetic"),
|
||||
"auto": fields.TextField(analyzer="autocomplete", search_analyzer="autocomplete_search"),
|
||||
},
|
||||
|
|
@ -44,9 +44,9 @@ class _BaseDoc(ActiveOnlyMixin, Document):
|
|||
return getattr(instance, "description", "") or ""
|
||||
|
||||
|
||||
class ProductDocument(_BaseDoc):
|
||||
class ProductDocument(ActiveOnlyMixin, _BaseDoc):
|
||||
rating = fields.FloatField(attr="rating")
|
||||
total_order = fields.IntegerField(attr="total_orders")
|
||||
total_orders = fields.IntegerField(attr="total_orders")
|
||||
brand_priority = fields.IntegerField(
|
||||
attr="brand.priority",
|
||||
index=True,
|
||||
|
|
@ -58,6 +58,27 @@ class ProductDocument(_BaseDoc):
|
|||
fields={"raw": fields.KeywordField()},
|
||||
)
|
||||
|
||||
brand_name = fields.TextField(
|
||||
attr="brand.name",
|
||||
analyzer="standard",
|
||||
fields={
|
||||
"raw": fields.KeywordField(ignore_above=256),
|
||||
"ngram": fields.TextField(analyzer="name_ngram", search_analyzer="icu_query"),
|
||||
"phonetic": fields.TextField(analyzer="name_phonetic"),
|
||||
"auto": fields.TextField(analyzer="autocomplete", search_analyzer="autocomplete_search"),
|
||||
},
|
||||
)
|
||||
category_name = fields.TextField(
|
||||
attr="category.name",
|
||||
analyzer="standard",
|
||||
fields={
|
||||
"raw": fields.KeywordField(ignore_above=256),
|
||||
"ngram": fields.TextField(analyzer="name_ngram", search_analyzer="icu_query"),
|
||||
"phonetic": fields.TextField(analyzer="name_phonetic"),
|
||||
"auto": fields.TextField(analyzer="autocomplete", search_analyzer="autocomplete_search"),
|
||||
},
|
||||
)
|
||||
|
||||
class Index(_BaseDoc.Index):
|
||||
name = "products"
|
||||
|
||||
|
|
@ -70,7 +91,7 @@ _add_multilang_fields(ProductDocument)
|
|||
registry.register_document(ProductDocument)
|
||||
|
||||
|
||||
class CategoryDocument(_BaseDoc):
|
||||
class CategoryDocument(ActiveOnlyMixin, _BaseDoc):
|
||||
priority = fields.IntegerField(attr="priority")
|
||||
|
||||
class Index(_BaseDoc.Index):
|
||||
|
|
@ -85,26 +106,18 @@ _add_multilang_fields(CategoryDocument)
|
|||
registry.register_document(CategoryDocument)
|
||||
|
||||
|
||||
class BrandDocument(ActiveOnlyMixin, Document):
|
||||
class BrandDocument(ActiveOnlyMixin, _BaseDoc):
|
||||
priority = fields.IntegerField(attr="priority")
|
||||
|
||||
class Index:
|
||||
class Index(_BaseDoc.Index):
|
||||
name = "brands"
|
||||
settings = {
|
||||
"number_of_shards": 1,
|
||||
"number_of_replicas": 0,
|
||||
"analysis": COMMON_ANALYSIS,
|
||||
"index": {"max_ngram_diff": 18},
|
||||
}
|
||||
|
||||
class Django:
|
||||
model = Brand
|
||||
fields = ["uuid"]
|
||||
|
||||
def prepare_name(self, instance):
|
||||
return getattr(instance, "name", "") or ""
|
||||
|
||||
|
||||
_add_multilang_fields(BrandDocument)
|
||||
registry.register_document(BrandDocument)
|
||||
|
||||
|
||||
|
|
@ -114,9 +127,7 @@ class TestModelDocument(Document):
|
|||
|
||||
class Django:
|
||||
model = TestModel
|
||||
fields = [
|
||||
"title",
|
||||
]
|
||||
fields = ["title"]
|
||||
ignore_signals = True
|
||||
related_models: list = []
|
||||
auto_refresh = False
|
||||
|
|
|
|||
|
|
@ -23,6 +23,10 @@ echo "Setting default caches..."
|
|||
docker compose exec app poetry run python manage.py set_default_caches
|
||||
echo "Default caches set successfully!"
|
||||
|
||||
echo "Building search Index..."
|
||||
docker compose exec app poetry run python manage.py search_index --rebuild -f
|
||||
echo "Search Index built successfully!"
|
||||
|
||||
echo "Cleaning up unused Docker data..."
|
||||
docker system prune -f
|
||||
echo "Unused Docker data cleaned successfully!"
|
||||
|
|
|
|||
|
|
@ -42,6 +42,13 @@ if ($LASTEXITCODE -ne 0) {
|
|||
}
|
||||
Write-Host "Default caches set successfully!" -ForegroundColor Green
|
||||
|
||||
Write-Host "Building search Index..." -ForegroundColor Magenta
|
||||
docker compose exec app poetry run python manage.py search_index --rebuild -f
|
||||
if ($LASTEXITCODE -ne 0) {
|
||||
exit $LASTEXITCODE
|
||||
}
|
||||
Write-Host "Search Index built successfully!" -ForegroundColor Green
|
||||
|
||||
Write-Host "Cleaning up unused Docker data..." -ForegroundColor Magenta
|
||||
docker system prune -f
|
||||
if ($LASTEXITCODE -ne 0) {
|
||||
|
|
|
|||
Loading…
Reference in a new issue