Refactor Elasticsearch documents for efficiency and clarity
Simplified field definitions and improved code consistency by consolidating and aligning styles. Added `prepare_*` methods to gracefully handle None values for fields. Enhanced multilingual support and streamlined query construction for better maintainability.
This commit is contained in:
parent
64a2fe7726
commit
ef553a94a4
3 changed files with 75 additions and 101 deletions
|
|
@ -11,25 +11,21 @@ class PostDocument(ActiveOnlyMixin, Document):
|
||||||
analyzer="standard",
|
analyzer="standard",
|
||||||
fields={
|
fields={
|
||||||
"raw": fields.KeywordField(ignore_above=256),
|
"raw": fields.KeywordField(ignore_above=256),
|
||||||
"ngram": fields.TextField(
|
"ngram": fields.TextField(analyzer="name_ngram", search_analyzer="query_lc"),
|
||||||
analyzer="name_ngram", search_analyzer="query_lc"
|
|
||||||
),
|
|
||||||
"phonetic": fields.TextField(analyzer="name_phonetic"),
|
"phonetic": fields.TextField(analyzer="name_phonetic"),
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
class Index:
|
class Index:
|
||||||
name = "posts"
|
name = "posts"
|
||||||
settings = {
|
settings = {"number_of_shards": 1, "number_of_replicas": 0,
|
||||||
"number_of_shards": 1,
|
"analysis": COMMON_ANALYSIS, "index": {"max_ngram_diff": 18}}
|
||||||
"number_of_replicas": 0,
|
|
||||||
"analysis": COMMON_ANALYSIS,
|
|
||||||
"index": {"max_ngram_diff": 18},
|
|
||||||
}
|
|
||||||
|
|
||||||
class Django:
|
class Django:
|
||||||
model = Post
|
model = Post
|
||||||
fields = ["uuid"]
|
fields = ["uuid"]
|
||||||
|
|
||||||
|
def prepare_title(self, instance):
|
||||||
|
return getattr(instance, "title", "") or ""
|
||||||
|
|
||||||
registry.register_document(PostDocument)
|
registry.register_document(PostDocument)
|
||||||
|
|
|
||||||
|
|
@ -40,6 +40,7 @@ def process_query(query: str = ""):
|
||||||
|
|
||||||
query = query.strip()
|
query = query.strip()
|
||||||
try:
|
try:
|
||||||
|
# Build the boolean query
|
||||||
q = Q(
|
q = Q(
|
||||||
"bool",
|
"bool",
|
||||||
should=[
|
should=[
|
||||||
|
|
@ -53,31 +54,32 @@ def process_query(query: str = ""):
|
||||||
Q(
|
Q(
|
||||||
"multi_match",
|
"multi_match",
|
||||||
query=query,
|
query=query,
|
||||||
fields=[f.replace(".auto", ".auto") for f in SMART_FIELDS if ".auto" in f],
|
fields=[f for f in SMART_FIELDS if f.endswith('.auto')],
|
||||||
type="bool_prefix",
|
type="bool_prefix",
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
minimum_should_match=1,
|
minimum_should_match=1,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Execute search across multiple indices
|
||||||
search = Search(index=["products", "categories", "brands", "posts"]).query(q).extra(size=100)
|
search = Search(index=["products", "categories", "brands", "posts"]).query(q).extra(size=100)
|
||||||
|
|
||||||
response = search.execute()
|
response = search.execute()
|
||||||
|
|
||||||
|
# Collect results, guard against None values
|
||||||
results = {"products": [], "categories": [], "brands": [], "posts": []}
|
results = {"products": [], "categories": [], "brands": [], "posts": []}
|
||||||
for hit in response.hits:
|
for hit in response.hits:
|
||||||
obj_uuid = getattr(hit, "uuid", hit.meta.id)
|
obj_uuid = getattr(hit, "uuid", None) or hit.meta.id
|
||||||
obj_name = getattr(hit, "name", "N/A")
|
obj_name = getattr(hit, "name", None) or "N/A"
|
||||||
obj_slug = getattr(hit, "slug", slugify(hit.name))
|
# Safely generate a slug
|
||||||
if hit.meta.index == "products":
|
obj_slug = getattr(hit, "slug", None) or slugify(obj_name)
|
||||||
results["products"].append({"uuid": obj_uuid, "name": obj_name, "slug": obj_slug})
|
|
||||||
elif hit.meta.index == "categories":
|
|
||||||
results["categories"].append({"uuid": obj_uuid, "name": obj_name, "slug": obj_slug})
|
|
||||||
elif hit.meta.index == "brands":
|
|
||||||
results["brands"].append({"uuid": obj_uuid, "name": obj_name, "slug": obj_slug})
|
|
||||||
elif hit.meta.index == "posts":
|
|
||||||
results["posts"].append({"uuid": obj_uuid, "name": obj_name, "slug": obj_slug})
|
|
||||||
|
|
||||||
|
idx = hit.meta.index
|
||||||
|
if idx in results:
|
||||||
|
results[idx].append({
|
||||||
|
"uuid": str(obj_uuid),
|
||||||
|
"name": obj_name,
|
||||||
|
"slug": obj_slug,
|
||||||
|
})
|
||||||
return results
|
return results
|
||||||
except NotFoundError:
|
except NotFoundError:
|
||||||
raise Http404
|
raise Http404
|
||||||
|
|
@ -93,14 +95,14 @@ LANGUAGE_ANALYZER_MAP = {
|
||||||
"fr": "french",
|
"fr": "french",
|
||||||
"hi": "hindi",
|
"hi": "hindi",
|
||||||
"it": "italian",
|
"it": "italian",
|
||||||
"ja": "standard", # Kuromoji plugin recommended for production
|
"ja": "standard",
|
||||||
"kk": "standard", # No built‑in Kazakh stemmer ‑ falls back to ICU/standard
|
"kk": "standard",
|
||||||
"nl": "dutch",
|
"nl": "dutch",
|
||||||
"pl": "standard", # No built‑in Polish stemmer ‑ falls back to ICU/standard
|
"pl": "standard",
|
||||||
"pt": "portuguese",
|
"pt": "portuguese",
|
||||||
"ro": "romanian",
|
"ro": "romanian",
|
||||||
"ru": "russian",
|
"ru": "russian",
|
||||||
"zh": "standard", # smartcn / ICU plugin recommended for production
|
"zh": "standard",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -122,58 +124,34 @@ class ActiveOnlyMixin:
|
||||||
|
|
||||||
COMMON_ANALYSIS = {
|
COMMON_ANALYSIS = {
|
||||||
"filter": {
|
"filter": {
|
||||||
"edge_ngram_filter": {
|
"edge_ngram_filter": {"type": "edge_ngram", "min_gram": 1, "max_gram": 20},
|
||||||
"type": "edge_ngram",
|
"ngram_filter": {"type": "ngram", "min_gram": 2, "max_gram": 20},
|
||||||
"min_gram": 1,
|
"double_metaphone": {"type": "phonetic", "encoder": "double_metaphone", "replace": False},
|
||||||
"max_gram": 20,
|
|
||||||
},
|
|
||||||
"ngram_filter": {
|
|
||||||
"type": "ngram",
|
|
||||||
"min_gram": 2,
|
|
||||||
"max_gram": 20,
|
|
||||||
},
|
|
||||||
"double_metaphone": {
|
|
||||||
"type": "phonetic",
|
|
||||||
"encoder": "double_metaphone",
|
|
||||||
"replace": False,
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
"analyzer": {
|
"analyzer": {
|
||||||
"autocomplete": {
|
"autocomplete": {"tokenizer": "standard", "filter": ["lowercase", "asciifolding", "edge_ngram_filter"]},
|
||||||
"tokenizer": "standard",
|
"autocomplete_search": {"tokenizer": "standard", "filter": ["lowercase", "asciifolding"]},
|
||||||
"filter": ["lowercase", "asciifolding", "edge_ngram_filter"],
|
"name_ngram": {"tokenizer": "standard", "filter": ["lowercase", "asciifolding", "ngram_filter"]},
|
||||||
},
|
"name_phonetic": {"tokenizer": "standard", "filter": ["lowercase", "asciifolding", "double_metaphone"]},
|
||||||
"autocomplete_search": {
|
"query_lc": {"tokenizer": "standard", "filter": ["lowercase", "asciifolding"]},
|
||||||
"tokenizer": "standard",
|
|
||||||
"filter": ["lowercase", "asciifolding"],
|
|
||||||
},
|
|
||||||
"name_ngram": {
|
|
||||||
"tokenizer": "standard",
|
|
||||||
"filter": ["lowercase", "asciifolding", "ngram_filter"],
|
|
||||||
},
|
|
||||||
"name_phonetic": {
|
|
||||||
"tokenizer": "standard",
|
|
||||||
"filter": ["lowercase", "asciifolding", "double_metaphone"],
|
|
||||||
},
|
|
||||||
"query_lc": {
|
|
||||||
"tokenizer": "standard",
|
|
||||||
"filter": ["lowercase", "asciifolding"],
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def _add_multilang_fields(cls):
|
def _add_multilang_fields(cls):
|
||||||
|
"""
|
||||||
|
Dynamically add multilingual name/description fields and prepare methods to guard against None.
|
||||||
|
"""
|
||||||
for code, _lang in settings.LANGUAGES:
|
for code, _lang in settings.LANGUAGES:
|
||||||
lc = code.replace("-", "_").lower()
|
lc = code.replace("-", "_").lower()
|
||||||
analyzer = _lang_analyzer(code)
|
# name_{lc}
|
||||||
|
name_field = f"name_{lc}"
|
||||||
setattr(
|
setattr(
|
||||||
cls,
|
cls,
|
||||||
f"name_{lc}",
|
name_field,
|
||||||
fields.TextField(
|
fields.TextField(
|
||||||
attr=f"name_{lc}",
|
attr=name_field,
|
||||||
analyzer=analyzer,
|
analyzer=_lang_analyzer(code),
|
||||||
copy_to="name",
|
copy_to="name",
|
||||||
fields={
|
fields={
|
||||||
"raw": fields.KeywordField(ignore_above=256),
|
"raw": fields.KeywordField(ignore_above=256),
|
||||||
|
|
@ -182,12 +160,19 @@ def _add_multilang_fields(cls):
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
# prepare_name_{lc} to ensure no None values
|
||||||
|
def make_prepare(attr):
|
||||||
|
return lambda self, instance: getattr(instance, attr, "") or ""
|
||||||
|
setattr(cls, f"prepare_{name_field}", make_prepare(name_field))
|
||||||
|
|
||||||
|
# description_{lc}
|
||||||
|
desc_field = f"description_{lc}"
|
||||||
setattr(
|
setattr(
|
||||||
cls,
|
cls,
|
||||||
f"description_{lc}",
|
desc_field,
|
||||||
fields.TextField(
|
fields.TextField(
|
||||||
attr=f"description_{lc}",
|
attr=desc_field,
|
||||||
analyzer=analyzer,
|
analyzer=_lang_analyzer(code),
|
||||||
copy_to="description",
|
copy_to="description",
|
||||||
fields={
|
fields={
|
||||||
"raw": fields.KeywordField(ignore_above=256),
|
"raw": fields.KeywordField(ignore_above=256),
|
||||||
|
|
@ -196,3 +181,4 @@ def _add_multilang_fields(cls):
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
setattr(cls, f"prepare_{desc_field}", make_prepare(desc_field))
|
||||||
|
|
@ -7,33 +7,24 @@ from core.models import Brand, Category, Product
|
||||||
|
|
||||||
class _BaseDoc(ActiveOnlyMixin, Document):
|
class _BaseDoc(ActiveOnlyMixin, Document):
|
||||||
name = fields.TextField(
|
name = fields.TextField(
|
||||||
|
attr="name",
|
||||||
analyzer="standard",
|
analyzer="standard",
|
||||||
fields={
|
fields={
|
||||||
"raw": fields.KeywordField(ignore_above=256),
|
"raw": fields.KeywordField(ignore_above=256),
|
||||||
"ngram": fields.TextField(analyzer="name_ngram",
|
"ngram": fields.TextField(analyzer="name_ngram", search_analyzer="query_lc"),
|
||||||
search_analyzer="query_lc"),
|
"phonetic": fields.TextField(analyzer="name_phonetic"),
|
||||||
"phonetic": fields.TextField(analyzer="name_phonetic"),
|
"auto": fields.TextField(analyzer="autocomplete", search_analyzer="autocomplete_search"),
|
||||||
"auto": fields.TextField(
|
|
||||||
analyzer="autocomplete",
|
|
||||||
search_analyzer="autocomplete_search",
|
|
||||||
),
|
|
||||||
},
|
},
|
||||||
attr=None,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
description = fields.TextField(
|
description = fields.TextField(
|
||||||
|
attr="description",
|
||||||
analyzer="standard",
|
analyzer="standard",
|
||||||
fields={
|
fields={
|
||||||
"raw": fields.KeywordField(ignore_above=256),
|
"raw": fields.KeywordField(ignore_above=256),
|
||||||
"ngram": fields.TextField(analyzer="name_ngram",
|
"ngram": fields.TextField(analyzer="name_ngram", search_analyzer="query_lc"),
|
||||||
search_analyzer="query_lc"),
|
"phonetic": fields.TextField(analyzer="name_phonetic"),
|
||||||
"phonetic": fields.TextField(analyzer="name_phonetic"),
|
"auto": fields.TextField(analyzer="autocomplete", search_analyzer="autocomplete_search"),
|
||||||
"auto": fields.TextField(
|
|
||||||
analyzer="autocomplete",
|
|
||||||
search_analyzer="autocomplete_search",
|
|
||||||
),
|
|
||||||
},
|
},
|
||||||
attr=None,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
class Index:
|
class Index:
|
||||||
|
|
@ -41,11 +32,15 @@ class _BaseDoc(ActiveOnlyMixin, Document):
|
||||||
"number_of_shards": 1,
|
"number_of_shards": 1,
|
||||||
"number_of_replicas": 0,
|
"number_of_replicas": 0,
|
||||||
"analysis": COMMON_ANALYSIS,
|
"analysis": COMMON_ANALYSIS,
|
||||||
"index": {
|
"index": {"max_ngram_diff": 20},
|
||||||
"max_ngram_diff": 20,
|
|
||||||
},
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def prepare_name(self, instance):
|
||||||
|
return getattr(instance, "name", "") or ""
|
||||||
|
|
||||||
|
def prepare_description(self, instance):
|
||||||
|
return getattr(instance, "description", "") or ""
|
||||||
|
|
||||||
|
|
||||||
class ProductDocument(_BaseDoc):
|
class ProductDocument(_BaseDoc):
|
||||||
rating = fields.FloatField(attr="rating")
|
rating = fields.FloatField(attr="rating")
|
||||||
|
|
@ -81,25 +76,22 @@ class BrandDocument(ActiveOnlyMixin, Document):
|
||||||
analyzer="standard",
|
analyzer="standard",
|
||||||
fields={
|
fields={
|
||||||
"raw": fields.KeywordField(ignore_above=256),
|
"raw": fields.KeywordField(ignore_above=256),
|
||||||
"ngram": fields.TextField(
|
"ngram": fields.TextField(analyzer="name_ngram", search_analyzer="query_lc"),
|
||||||
analyzer="name_ngram", search_analyzer="query_lc"
|
|
||||||
),
|
|
||||||
"phonetic": fields.TextField(analyzer="name_phonetic"),
|
"phonetic": fields.TextField(analyzer="name_phonetic"),
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
class Index:
|
class Index:
|
||||||
name = "brands"
|
name = "brands"
|
||||||
settings = {
|
settings = {"number_of_shards": 1, "number_of_replicas": 0,
|
||||||
"number_of_shards": 1,
|
"analysis": COMMON_ANALYSIS, "index": {"max_ngram_diff": 18}}
|
||||||
"number_of_replicas": 0,
|
|
||||||
"analysis": COMMON_ANALYSIS,
|
|
||||||
"index": {"max_ngram_diff": 18},
|
|
||||||
}
|
|
||||||
|
|
||||||
class Django:
|
class Django:
|
||||||
model = Brand
|
model = Brand
|
||||||
fields = ["uuid"]
|
fields = ["uuid"]
|
||||||
|
|
||||||
|
def prepare_name(self, instance):
|
||||||
|
return getattr(instance, "name", "") or ""
|
||||||
|
|
||||||
registry.register_document(BrandDocument)
|
|
||||||
|
registry.register_document(BrandDocument)
|
||||||
Loading…
Reference in a new issue