schon/engine/core/feeds/base.py
Egor fureunoir Gorbunov 82f4381fcb feat(core): improve XML formatting and validation rules
- update `prettify_xml` to strip XML declaration for more flexibility
- prepend XML declaration explicitly in Google Merchant feed generation
- adjust pagination `page_size` max limit to 128 for stricter validation
2026-01-30 14:43:52 +03:00

173 lines
5.6 KiB
Python

import json
import logging
import os
from abc import ABC, abstractmethod
from datetime import datetime
from typing import Any
from xml.dom import minidom
from xml.etree.ElementTree import Element, tostring
import yaml
from django.conf import settings
from django.db.models import QuerySet
from engine.core.models import Product
logger = logging.getLogger(__name__)
class BaseFeedGenerator(ABC):
"""
Base class for marketplace feed generators.
Each marketplace feed generator should inherit from this class and implement
the required methods for generating feed data in the appropriate format.
"""
name: str = "base"
supported_formats: tuple[str, ...] = ("xml", "json", "yaml")
default_format: str = "xml"
def __init__(self, locale: str = "en-gb"):
self.locale = locale
self.generated_at = datetime.now()
def get_products(self) -> QuerySet[Product]:
"""Get products that should be exported to marketplaces."""
return (
Product.objects.filter(
is_active=True,
export_to_marketplaces=True,
)
.select_related(
"category",
"brand",
)
.prefetch_related(
"images",
"stocks",
"attributes__attribute",
"tags",
)
)
def get_product_url(self, product: Product) -> str:
"""Generate the frontend URL for a product."""
return (
f"https://{settings.STOREFRONT_DOMAIN}/{self.locale}/product/{product.slug}"
)
def get_product_image_url(self, product: Product) -> str:
"""Get the primary image URL for a product."""
image = product.images.order_by("priority").first()
if image:
return image.image_url
return ""
def get_product_images(self, product: Product) -> list[str]:
"""Get all image URLs for a product."""
return [
img.image_url
for img in product.images.order_by("priority")
if img.image_url
]
def get_availability(self, product: Product) -> str:
"""Get availability status for a product."""
return "in stock" if product.quantity > 0 else "out of stock"
def get_currency(self) -> str:
"""Get the currency code."""
return settings.CURRENCY_CODE
def get_output_path(self, format_type: str) -> str:
"""Get the output file path for the feed."""
feeds_dir = os.path.join(settings.MEDIA_ROOT, "feeds")
os.makedirs(feeds_dir, exist_ok=True)
extension = format_type if format_type != "yaml" else "yml"
return os.path.join(feeds_dir, f"{self.name}.{extension}")
@abstractmethod
def generate_feed_data(self, products: QuerySet[Product]) -> Any:
"""
Generate the feed data structure.
This method should be implemented by each marketplace-specific generator
to create the appropriate data structure for that marketplace.
"""
raise NotImplementedError
def to_xml(self, data: Any) -> str:
"""Convert feed data to XML format."""
raise NotImplementedError(
f"{self.__class__.__name__} does not support XML format"
)
def to_json(self, data: Any) -> str:
"""Convert feed data to JSON format."""
return json.dumps(data, ensure_ascii=False, indent=2)
def to_yaml(self, data: Any) -> str:
"""Convert feed data to YAML format."""
return yaml.dump(data, allow_unicode=True, default_flow_style=False)
def generate(self, format_type: str | None = None) -> str:
"""
Generate the feed and save it to a file.
Args:
format_type: The output format (xml, json, yaml). Defaults to the generator's default.
Returns:
The path to the generated feed file.
"""
if format_type is None:
format_type = self.default_format
if format_type not in self.supported_formats:
raise ValueError(
f"Format '{format_type}' is not supported by {self.__class__.__name__}. "
f"Supported formats: {self.supported_formats}"
)
products = self.get_products()
product_count = products.count()
if product_count == 0:
logger.warning("No products to export for %s feed", self.name)
logger.info("Generating %s feed with %d products", self.name, product_count)
feed_data = self.generate_feed_data(products)
match format_type:
case "xml":
content = self.to_xml(feed_data)
case "json":
content = self.to_json(feed_data)
case "yaml" | "yml":
content = self.to_yaml(feed_data)
case _:
raise ValueError(f"Unknown format: {format_type}")
output_path = self.get_output_path(format_type)
with open(output_path, "w", encoding="utf-8") as f:
f.write(content)
logger.info("Generated %s feed at %s", self.name, output_path)
return output_path
@staticmethod
def prettify_xml(elem: Element) -> str:
"""Return a pretty-printed XML string for the Element (without XML declaration)."""
rough_string = tostring(elem, encoding="unicode")
reparsed = minidom.parseString(rough_string)
pretty = reparsed.toprettyxml(indent=" ")
# Strip the XML declaration added by toprettyxml so callers can add their own
lines = pretty.split("\n")
if lines and lines[0].startswith("<?xml"):
lines = lines[1:]
return "\n".join(lines).strip()