Fixes: 1) Reworked search weights so text matches appear first.

This commit is contained in:
Egor Pavlovich Gorbunov 2025-08-25 00:11:47 +03:00
parent b3a0fbef56
commit d948d51f91

View file

@ -34,54 +34,60 @@ functions = [
"field_value_factor": { "field_value_factor": {
"field": "brand_priority", "field": "brand_priority",
"modifier": "log1p", "modifier": "log1p",
"factor": 1.2, "factor": 0.2,
"missing": 0, "missing": 0,
}, },
"weight": 0.6,
}, },
{ {
"filter": Q("term", **{"_index": "products"}), "filter": Q("term", **{"_index": "products"}),
"field_value_factor": { "field_value_factor": {
"field": "rating", "field": "rating",
"modifier": "log1p", "modifier": "log1p",
"factor": 1.2, "factor": 0.15,
"missing": 0, "missing": 0,
}, },
"weight": 0.5,
}, },
{ {
"filter": Q("term", **{"_index": "products"}), "filter": Q("term", **{"_index": "products"}),
"field_value_factor": { "field_value_factor": {
"field": "total_orders", "field": "total_orders",
"modifier": "log1p", "modifier": "log1p",
"factor": 1.3, "factor": 0.25,
"missing": 0, "missing": 0,
}, },
"weight": 0.7,
}, },
{ {
"filter": Q("term", **{"_index": "products"}), "filter": Q("term", **{"_index": "products"}),
"field_value_factor": { "field_value_factor": {
"field": "category_priority", "field": "category_priority",
"modifier": "log1p", "modifier": "log1p",
"factor": 1.2, "factor": 0.2,
"missing": 0, "missing": 0,
}, },
"weight": 0.6,
}, },
{ {
"filter": Q("term", **{"_index": "categories"}), "filter": Q("term", **{"_index": "categories"}),
"field_value_factor": { "field_value_factor": {
"field": "priority", "field": "priority",
"modifier": "log1p", "modifier": "log1p",
"factor": 1.5, "factor": 0.25,
"missing": 0, "missing": 0,
}, },
"weight": 0.8,
}, },
{ {
"filter": Q("term", **{"_index": "brands"}), "filter": Q("term", **{"_index": "brands"}),
"field_value_factor": { "field_value_factor": {
"field": "priority", "field": "priority",
"modifier": "log1p", "modifier": "log1p",
"factor": 1.5, "factor": 0.25,
"missing": 0, "missing": 0,
}, },
"weight": 0.8,
}, },
] ]
@ -93,8 +99,8 @@ def process_query(query: str = "", request: Request | None = None) -> dict[str,
query = query.strip() query = query.strip()
try: try:
exact_shoulds = [ exact_shoulds = [
Q("term", **{"name.raw": query}), Q("term", **{"name.raw": {"value": query, "boost": 3.0}}),
Q("term", **{"slug": slugify(query)}), Q("term", **{"slug": {"value": slugify(query), "boost": 2.0}}),
] ]
lang = "" lang = ""
@ -142,18 +148,31 @@ def process_query(query: str = "", request: Request | None = None) -> dict[str,
minimum_should_match=1, minimum_should_match=1,
) )
function_score_query = Q( search = (
"function_score", Search(index=["products", "categories", "brands", "posts"])
query=query_base, .query(query_base)
functions=functions, .extra(
boost_mode="multiply", rescore={
score_mode="sum", "window_size": 200,
"query": {
"rescore_query": Q(
"function_score",
query=Q("match_all"),
functions=functions,
boost_mode="sum",
score_mode="sum",
max_boost=2.0,
).to_dict(),
"query_weight": 1.0,
"rescore_query_weight": 1.0,
},
"track_scores": True,
}
)
.extra(size=100)
) )
search = Search(index=["products", "categories", "brands", "posts"]).query(function_score_query).extra(size=100)
response = search.execute() response = search.execute()
# Batch-load related image data to avoid N+1 queries
results: dict = {"products": [], "categories": [], "brands": [], "posts": []} results: dict = {"products": [], "categories": [], "brands": [], "posts": []}
uuids_by_index: dict[str, list] = {"products": [], "categories": [], "brands": []} uuids_by_index: dict[str, list] = {"products": [], "categories": [], "brands": []}
hit_cache: list = [] hit_cache: list = []