diff --git a/blog/elasticsearch/documents.py b/blog/elasticsearch/documents.py index af278a24..beb05ddb 100644 --- a/blog/elasticsearch/documents.py +++ b/blog/elasticsearch/documents.py @@ -11,25 +11,21 @@ class PostDocument(ActiveOnlyMixin, Document): analyzer="standard", fields={ "raw": fields.KeywordField(ignore_above=256), - "ngram": fields.TextField( - analyzer="name_ngram", search_analyzer="query_lc" - ), + "ngram": fields.TextField(analyzer="name_ngram", search_analyzer="query_lc"), "phonetic": fields.TextField(analyzer="name_phonetic"), }, ) class Index: name = "posts" - settings = { - "number_of_shards": 1, - "number_of_replicas": 0, - "analysis": COMMON_ANALYSIS, - "index": {"max_ngram_diff": 18}, - } + settings = {"number_of_shards": 1, "number_of_replicas": 0, + "analysis": COMMON_ANALYSIS, "index": {"max_ngram_diff": 18}} class Django: model = Post fields = ["uuid"] + def prepare_title(self, instance): + return getattr(instance, "title", "") or "" registry.register_document(PostDocument) diff --git a/core/elasticsearch/__init__.py b/core/elasticsearch/__init__.py index 84a0902d..7a176ca7 100644 --- a/core/elasticsearch/__init__.py +++ b/core/elasticsearch/__init__.py @@ -40,6 +40,7 @@ def process_query(query: str = ""): query = query.strip() try: + # Build the boolean query q = Q( "bool", should=[ @@ -53,31 +54,32 @@ def process_query(query: str = ""): Q( "multi_match", query=query, - fields=[f.replace(".auto", ".auto") for f in SMART_FIELDS if ".auto" in f], + fields=[f for f in SMART_FIELDS if f.endswith('.auto')], type="bool_prefix", ), ], minimum_should_match=1, ) + # Execute search across multiple indices search = Search(index=["products", "categories", "brands", "posts"]).query(q).extra(size=100) - response = search.execute() + # Collect results, guard against None values results = {"products": [], "categories": [], "brands": [], "posts": []} for hit in response.hits: - obj_uuid = getattr(hit, "uuid", hit.meta.id) - obj_name = getattr(hit, "name", "N/A") - obj_slug = getattr(hit, "slug", slugify(hit.name)) - if hit.meta.index == "products": - results["products"].append({"uuid": obj_uuid, "name": obj_name, "slug": obj_slug}) - elif hit.meta.index == "categories": - results["categories"].append({"uuid": obj_uuid, "name": obj_name, "slug": obj_slug}) - elif hit.meta.index == "brands": - results["brands"].append({"uuid": obj_uuid, "name": obj_name, "slug": obj_slug}) - elif hit.meta.index == "posts": - results["posts"].append({"uuid": obj_uuid, "name": obj_name, "slug": obj_slug}) + obj_uuid = getattr(hit, "uuid", None) or hit.meta.id + obj_name = getattr(hit, "name", None) or "N/A" + # Safely generate a slug + obj_slug = getattr(hit, "slug", None) or slugify(obj_name) + idx = hit.meta.index + if idx in results: + results[idx].append({ + "uuid": str(obj_uuid), + "name": obj_name, + "slug": obj_slug, + }) return results except NotFoundError: raise Http404 @@ -93,14 +95,14 @@ LANGUAGE_ANALYZER_MAP = { "fr": "french", "hi": "hindi", "it": "italian", - "ja": "standard", # Kuromoji plugin recommended for production - "kk": "standard", # No built‑in Kazakh stemmer ‑ falls back to ICU/standard + "ja": "standard", + "kk": "standard", "nl": "dutch", - "pl": "standard", # No built‑in Polish stemmer ‑ falls back to ICU/standard + "pl": "standard", "pt": "portuguese", "ro": "romanian", "ru": "russian", - "zh": "standard", # smartcn / ICU plugin recommended for production + "zh": "standard", } @@ -122,58 +124,34 @@ class ActiveOnlyMixin: COMMON_ANALYSIS = { "filter": { - "edge_ngram_filter": { - "type": "edge_ngram", - "min_gram": 1, - "max_gram": 20, - }, - "ngram_filter": { - "type": "ngram", - "min_gram": 2, - "max_gram": 20, - }, - "double_metaphone": { - "type": "phonetic", - "encoder": "double_metaphone", - "replace": False, - }, + "edge_ngram_filter": {"type": "edge_ngram", "min_gram": 1, "max_gram": 20}, + "ngram_filter": {"type": "ngram", "min_gram": 2, "max_gram": 20}, + "double_metaphone": {"type": "phonetic", "encoder": "double_metaphone", "replace": False}, }, "analyzer": { - "autocomplete": { - "tokenizer": "standard", - "filter": ["lowercase", "asciifolding", "edge_ngram_filter"], - }, - "autocomplete_search": { - "tokenizer": "standard", - "filter": ["lowercase", "asciifolding"], - }, - "name_ngram": { - "tokenizer": "standard", - "filter": ["lowercase", "asciifolding", "ngram_filter"], - }, - "name_phonetic": { - "tokenizer": "standard", - "filter": ["lowercase", "asciifolding", "double_metaphone"], - }, - "query_lc": { - "tokenizer": "standard", - "filter": ["lowercase", "asciifolding"], - }, + "autocomplete": {"tokenizer": "standard", "filter": ["lowercase", "asciifolding", "edge_ngram_filter"]}, + "autocomplete_search": {"tokenizer": "standard", "filter": ["lowercase", "asciifolding"]}, + "name_ngram": {"tokenizer": "standard", "filter": ["lowercase", "asciifolding", "ngram_filter"]}, + "name_phonetic": {"tokenizer": "standard", "filter": ["lowercase", "asciifolding", "double_metaphone"]}, + "query_lc": {"tokenizer": "standard", "filter": ["lowercase", "asciifolding"]}, }, } def _add_multilang_fields(cls): + """ + Dynamically add multilingual name/description fields and prepare methods to guard against None. + """ for code, _lang in settings.LANGUAGES: lc = code.replace("-", "_").lower() - analyzer = _lang_analyzer(code) - + # name_{lc} + name_field = f"name_{lc}" setattr( cls, - f"name_{lc}", + name_field, fields.TextField( - attr=f"name_{lc}", - analyzer=analyzer, + attr=name_field, + analyzer=_lang_analyzer(code), copy_to="name", fields={ "raw": fields.KeywordField(ignore_above=256), @@ -182,12 +160,19 @@ def _add_multilang_fields(cls): }, ), ) + # prepare_name_{lc} to ensure no None values + def make_prepare(attr): + return lambda self, instance: getattr(instance, attr, "") or "" + setattr(cls, f"prepare_{name_field}", make_prepare(name_field)) + + # description_{lc} + desc_field = f"description_{lc}" setattr( cls, - f"description_{lc}", + desc_field, fields.TextField( - attr=f"description_{lc}", - analyzer=analyzer, + attr=desc_field, + analyzer=_lang_analyzer(code), copy_to="description", fields={ "raw": fields.KeywordField(ignore_above=256), @@ -196,3 +181,4 @@ def _add_multilang_fields(cls): }, ), ) + setattr(cls, f"prepare_{desc_field}", make_prepare(desc_field)) \ No newline at end of file diff --git a/core/elasticsearch/documents.py b/core/elasticsearch/documents.py index 1031d05a..c0d72953 100644 --- a/core/elasticsearch/documents.py +++ b/core/elasticsearch/documents.py @@ -7,33 +7,24 @@ from core.models import Brand, Category, Product class _BaseDoc(ActiveOnlyMixin, Document): name = fields.TextField( + attr="name", analyzer="standard", fields={ - "raw": fields.KeywordField(ignore_above=256), - "ngram": fields.TextField(analyzer="name_ngram", - search_analyzer="query_lc"), - "phonetic": fields.TextField(analyzer="name_phonetic"), - "auto": fields.TextField( - analyzer="autocomplete", - search_analyzer="autocomplete_search", - ), + "raw": fields.KeywordField(ignore_above=256), + "ngram": fields.TextField(analyzer="name_ngram", search_analyzer="query_lc"), + "phonetic": fields.TextField(analyzer="name_phonetic"), + "auto": fields.TextField(analyzer="autocomplete", search_analyzer="autocomplete_search"), }, - attr=None, ) - description = fields.TextField( + attr="description", analyzer="standard", fields={ - "raw": fields.KeywordField(ignore_above=256), - "ngram": fields.TextField(analyzer="name_ngram", - search_analyzer="query_lc"), - "phonetic": fields.TextField(analyzer="name_phonetic"), - "auto": fields.TextField( - analyzer="autocomplete", - search_analyzer="autocomplete_search", - ), + "raw": fields.KeywordField(ignore_above=256), + "ngram": fields.TextField(analyzer="name_ngram", search_analyzer="query_lc"), + "phonetic": fields.TextField(analyzer="name_phonetic"), + "auto": fields.TextField(analyzer="autocomplete", search_analyzer="autocomplete_search"), }, - attr=None, ) class Index: @@ -41,11 +32,15 @@ class _BaseDoc(ActiveOnlyMixin, Document): "number_of_shards": 1, "number_of_replicas": 0, "analysis": COMMON_ANALYSIS, - "index": { - "max_ngram_diff": 20, - }, + "index": {"max_ngram_diff": 20}, } + def prepare_name(self, instance): + return getattr(instance, "name", "") or "" + + def prepare_description(self, instance): + return getattr(instance, "description", "") or "" + class ProductDocument(_BaseDoc): rating = fields.FloatField(attr="rating") @@ -81,25 +76,22 @@ class BrandDocument(ActiveOnlyMixin, Document): analyzer="standard", fields={ "raw": fields.KeywordField(ignore_above=256), - "ngram": fields.TextField( - analyzer="name_ngram", search_analyzer="query_lc" - ), + "ngram": fields.TextField(analyzer="name_ngram", search_analyzer="query_lc"), "phonetic": fields.TextField(analyzer="name_phonetic"), }, ) class Index: name = "brands" - settings = { - "number_of_shards": 1, - "number_of_replicas": 0, - "analysis": COMMON_ANALYSIS, - "index": {"max_ngram_diff": 18}, - } + settings = {"number_of_shards": 1, "number_of_replicas": 0, + "analysis": COMMON_ANALYSIS, "index": {"max_ngram_diff": 18}} class Django: model = Brand fields = ["uuid"] + def prepare_name(self, instance): + return getattr(instance, "name", "") or "" -registry.register_document(BrandDocument) + +registry.register_document(BrandDocument) \ No newline at end of file