Features: 1) Enhance product filter to support Elasticsearch ranking with preserved order; 2) Add personal_orders_only field to ProductDocument; 3) Introduce "name_exact" analyzer for case-insensitive exact matching in Elasticsearch;

Fixes: 1) Fix Elasticsearch document mapping to include missing "ci" field; 2) Correct slug field index setting from index=False to indexed; 3) Update Elasticsearch search weights and sizes for improved relevance and pagination;

Extra: 1) Refactor product search logic to prioritize partnumber > sku > slug > name.ci; 2) Add new analyzer "name_exact" for case-insensitive exact matching; 3) Adjust stock model to allow blank system_attributes; 4) Update migration to reflect JSONField changes; 5) Adjust Elasticsearch boosting weights for better ranking; 6) Increase category search size to 33 for consistency; 7) Add missing personal_orders_only field to ProductDocument.
This commit is contained in:
Egor Pavlovich Gorbunov 2025-11-02 03:09:16 +03:00
parent 0cec8b0380
commit 38b22704b1
5 changed files with 89 additions and 36 deletions

View file

@ -58,32 +58,36 @@ functions = [
{ {
"filter": Q("term", **{"_index": "products"}), "filter": Q("term", **{"_index": "products"}),
"field_value_factor": { "field_value_factor": {
"field": "rating", "field": "category_priority",
"modifier": "log1p", "modifier": "log1p",
"factor": 0.10, "factor": 0.16,
"missing": 0, "missing": 0,
}, },
"weight": 0.3, "weight": 0.36,
},
{
"filter": Q("term", **{"_index": "products"}),
"field_value_factor": {
"field": "rating",
"modifier": "log1p",
"factor": 0.08,
"missing": 0,
},
"weight": 0.25,
}, },
{ {
"filter": Q("term", **{"_index": "products"}), "filter": Q("term", **{"_index": "products"}),
"field_value_factor": { "field_value_factor": {
"field": "total_orders", "field": "total_orders",
"modifier": "log1p", "modifier": "log1p",
"factor": 0.18,
"missing": 0,
},
"weight": 0.4,
},
{
"filter": Q("term", **{"_index": "products"}),
"field_value_factor": {
"field": "category_priority",
"modifier": "log1p",
"factor": 0.15, "factor": 0.15,
"missing": 0, "missing": 0,
}, },
"weight": 0.35, "weight": 0.3,
},
{
"filter": Q("bool", must=[Q("term", **{"_index": "products"}), Q("term", **{"personal_orders_only": False})]),
"weight": 0.7,
}, },
{ {
"filter": Q("term", **{"_index": "categories"}), "filter": Q("term", **{"_index": "categories"}),
@ -120,10 +124,10 @@ def process_query(
query = query.strip() query = query.strip()
try: try:
exact_shoulds = [ exact_shoulds = [
Q("term", **{"name.raw": {"value": query, "boost": 2.0}}), Q("term", **{"partnumber.raw": {"value": query.lower(), "boost": 20.0}}),
Q("term", **{"slug": {"value": slugify(query), "boost": 1.5}}), Q("term", **{"sku.raw": {"value": query.lower(), "boost": 16.0}}),
Q("term", **{"sku.raw": {"value": query.lower(), "boost": 6.0}}), Q("term", **{"slug": {"value": slugify(query), "boost": 12.0}}),
Q("term", **{"partnumber.raw": {"value": query.lower(), "boost": 7.0}}), Q("match", **{"name.ci": {"query": query, "boost": 8.0}}),
] ]
lang = "" lang = ""
@ -172,10 +176,9 @@ def process_query(
if is_code_like: if is_code_like:
text_shoulds.extend( text_shoulds.extend(
[ [
Q("term", **{"sku.raw": {"value": query.lower(), "boost": 10.0}}), Q("term", **{"partnumber.raw": {"value": query.lower(), "boost": 14.0}}),
Q("term", **{"partnumber.raw": {"value": query.lower(), "boost": 12.0}}), Q("term", **{"sku.raw": {"value": query.lower(), "boost": 12.0}}),
Q("prefix", **{"sku.raw": {"value": query.lower(), "boost": 5.0}}), Q("prefix", **{"partnumber.raw": {"value": query.lower(), "boost": 4.0}}),
Q("prefix", **{"partnumber.raw": {"value": query.lower(), "boost": 6.0}}),
] ]
) )
@ -211,17 +214,17 @@ def process_query(
resp_cats = None resp_cats = None
if "categories" in indexes: if "categories" in indexes:
search_cats = build_search(["categories"], size=22) search_cats = build_search(["categories"], size=33)
resp_cats = search_cats.execute() resp_cats = search_cats.execute()
resp_brands = None resp_brands = None
if "brands" in indexes: if "brands" in indexes:
search_brands = build_search(["brands"], size=22) search_brands = build_search(["brands"], size=33)
resp_brands = search_brands.execute() resp_brands = search_brands.execute()
resp_products = None resp_products = None
if "products" in indexes: if "products" in indexes:
search_products = build_search(["products"], size=44) search_products = build_search(["products"], size=33)
resp_products = search_products.execute() resp_products = search_products.execute()
results: dict[str, list[dict[str, Any]]] = {"products": [], "categories": [], "brands": [], "posts": []} results: dict[str, list[dict[str, Any]]] = {"products": [], "categories": [], "brands": [], "posts": []}
@ -252,15 +255,27 @@ def process_query(
Q("term", **{"name.raw": {"value": query}}), Q("term", **{"name.raw": {"value": query}}),
Q("term", **{"slug": {"value": slugify(query)}}), Q("term", **{"slug": {"value": slugify(query)}}),
], ],
"products": [
Q("term", **{"name.raw": {"value": query}}),
Q("term", **{"slug": {"value": slugify(query)}}),
Q("term", **{"sku.raw": {"value": query.lower()}}),
Q("term", **{"partnumber.raw": {"value": query.lower()}}),
],
} }
for idx_name in ("categories", "brands", "products"): # Collect exact product matches in strict priority: partnumber > sku > slug > name.ci
if "products" in indexes:
product_exact_sequence = [
Q("term", **{"partnumber.raw": {"value": query.lower()}}),
Q("term", **{"sku.raw": {"value": query.lower()}}),
Q("term", **{"slug": {"value": slugify(query)}}),
Q("match", **{"name.ci": {"query": query}}),
]
for qx in product_exact_sequence:
try:
resp_exact = (
Search(index=["products"]).query(qx).extra(size=5, track_total_hits=False).execute()
)
except NotFoundError:
resp_exact = None
if resp_exact is not None and getattr(resp_exact, "hits", None):
_collect_hits(list(resp_exact.hits))
for idx_name in ("categories", "brands"):
if idx_name in indexes: if idx_name in indexes:
shoulds = exact_queries_by_index[idx_name] shoulds = exact_queries_by_index[idx_name]
s_exact = ( s_exact = (
@ -389,6 +404,7 @@ class ActiveOnlyMixin:
COMMON_ANALYSIS = { COMMON_ANALYSIS = {
"char_filter": { "char_filter": {
"icu_nfkc_cf": {"type": "icu_normalizer", "name": "nfkc_cf"}, "icu_nfkc_cf": {"type": "icu_normalizer", "name": "nfkc_cf"},
"strip_ws_punct": {"type": "pattern_replace", "pattern": "[\\s\\p{Punct}]+", "replacement": ""},
}, },
"filter": { "filter": {
"edge_ngram_filter": {"type": "edge_ngram", "min_gram": 1, "max_gram": 20}, "edge_ngram_filter": {"type": "edge_ngram", "min_gram": 1, "max_gram": 20},
@ -433,6 +449,12 @@ COMMON_ANALYSIS = {
"tokenizer": "icu_tokenizer", "tokenizer": "icu_tokenizer",
"filter": ["lowercase", "icu_folding", "double_metaphone"], "filter": ["lowercase", "icu_folding", "double_metaphone"],
}, },
"name_exact": {
"type": "custom",
"char_filter": ["icu_nfkc_cf", "strip_ws_punct"],
"tokenizer": "keyword",
"filter": ["lowercase", "icu_folding"],
},
"cjk_search": { "cjk_search": {
"type": "custom", "type": "custom",
"char_filter": ["icu_nfkc_cf"], "char_filter": ["icu_nfkc_cf"],

View file

@ -19,6 +19,7 @@ class BaseDocument(Document): # type: ignore [misc]
"phonetic": fields.TextField(analyzer="name_phonetic"), "phonetic": fields.TextField(analyzer="name_phonetic"),
"auto": fields.TextField(analyzer="autocomplete", search_analyzer="autocomplete_search"), "auto": fields.TextField(analyzer="autocomplete", search_analyzer="autocomplete_search"),
"translit": fields.TextField(analyzer="translit_index", search_analyzer="translit_query"), "translit": fields.TextField(analyzer="translit_index", search_analyzer="translit_query"),
"ci": fields.TextField(analyzer="name_exact", search_analyzer="name_exact"),
}, },
) )
description = fields.TextField( description = fields.TextField(
@ -32,7 +33,7 @@ class BaseDocument(Document): # type: ignore [misc]
"translit": fields.TextField(analyzer="translit_index", search_analyzer="translit_query"), "translit": fields.TextField(analyzer="translit_index", search_analyzer="translit_query"),
}, },
) )
slug = fields.KeywordField(attr="slug", index=False) slug = fields.KeywordField(attr="slug")
class Index: class Index:
settings = { settings = {
@ -52,6 +53,7 @@ class BaseDocument(Document): # type: ignore [misc]
class ProductDocument(ActiveOnlyMixin, BaseDocument): class ProductDocument(ActiveOnlyMixin, BaseDocument):
rating = fields.FloatField(attr="rating") rating = fields.FloatField(attr="rating")
total_orders = fields.IntegerField(attr="total_orders") total_orders = fields.IntegerField(attr="total_orders")
personal_orders_only = fields.BooleanField(attr="personal_orders_only")
brand_priority = fields.IntegerField( brand_priority = fields.IntegerField(
attr="brand.priority", attr="brand.priority",
index=True, index=True,

View file

@ -135,6 +135,7 @@ class ProductFilter(FilterSet): # type: ignore [misc]
prefix: str | None = None, prefix: str | None = None,
) -> None: ) -> None:
super().__init__(data=data, queryset=queryset, request=request, prefix=prefix) super().__init__(data=data, queryset=queryset, request=request, prefix=prefix)
self._es_rank_applied: bool = False
ordering_param = self.data.get("order_by", "") ordering_param = self.data.get("order_by", "")
if ordering_param: if ordering_param:
order_fields = [field.strip("-") for field in ordering_param.split(",")] order_fields = [field.strip("-") for field in ordering_param.split(",")]
@ -164,9 +165,19 @@ class ProductFilter(FilterSet): # type: ignore [misc]
if not value: if not value:
return queryset return queryset
uuids = [product.get("uuid") for product in process_query(query=value, indexes=("products",))["products"]] # type: ignore es_products = process_query(query=value, indexes=("products",)) # type: ignore
uuids = [p.get("uuid") for p in (es_products or {}).get("products", [])][:33]
if not uuids:
return queryset.none()
return queryset.filter(uuid__in=uuids) # Preserve ES order using a CASE expression
when_statements = [When(uuid=u, then=pos) for pos, u in enumerate(uuids)]
queryset = queryset.filter(uuid__in=uuids).annotate(
es_rank=Case(*when_statements, default=Value(9999), output_field=IntegerField())
)
# Mark that ES ranking is applied, qs() will order appropriately
self._es_rank_applied = True
return queryset
def filter_include_flag(self, queryset: QuerySet[Product], name: str, value: str) -> QuerySet[Product]: def filter_include_flag(self, queryset: QuerySet[Product], name: str, value: str) -> QuerySet[Product]:
if not self.data.get("category_uuid"): if not self.data.get("category_uuid"):

View file

@ -0,0 +1,18 @@
# Generated by Django 5.2.7 on 2025-11-01 23:45
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('core', '0051_stock_system_attributes'),
]
operations = [
migrations.AlterField(
model_name='stock',
name='system_attributes',
field=models.JSONField(blank=True, default=dict, verbose_name='system attributes'),
),
]

View file

@ -551,7 +551,7 @@ class Stock(ExportModelOperationsMixin("stock"), NiceModel): # type: ignore [mi
verbose_name=_("digital file"), verbose_name=_("digital file"),
upload_to="downloadables/", upload_to="downloadables/",
) )
system_attributes = JSONField(default=dict, verbose_name=_("system attributes")) system_attributes = JSONField(default=dict, verbose_name=_("system attributes"), blank=True)
def __str__(self) -> str: def __str__(self) -> str:
return f"{self.vendor.name} - {self.product!s}" return f"{self.vendor.name} - {self.product!s}"