from urllib.parse import urljoin from bs4 import BeautifulSoup from typing import List, Dict, Any from models import Product def parse_products_from_ssr(summaries: List[Dict[str, Any]]) -> List[Product]: out: List[Product] = [] for s in summaries or []: cw = (s.get("colourway") or {}) if isinstance(s, dict) else {} base = s.get("baseUrl") or "" rel = cw.get("url") or "" url = urljoin(base + "/", rel) if rel else (base or None) name = s.get("title") or cw.get("title") or None price = cw.get("price") or cw.get("priceMarket") or None color = cw.get("color") or None currency = s.get("currencyCode") or None out.append(Product( product_id = s.get("id") or None, url = url, name = name, price = str(price) if price is not None else None, currency = currency, image_urls = [], # картинки построим позже по imageCdnUrl + productImageUrlPart color = color, size_variants = [] # для homeware обычно пусто; для fashion добавим позже )) return out def parse_products_from_dom(html: str, cfg: Dict[str, Any]) -> List[Product]: soup = BeautifulSoup(html, "lxml") sel = cfg["selectors"] tiles = soup.select(sel["product_tile"]) out = [] for t in tiles: try: a = t.select_one(sel["product_link"]) name_el = t.select_one(sel["product_name"]) price_el = t.select_one(sel["product_price"]) url = a.get("href") if a else None if url and url.startswith("/"): url = cfg.get("base_url", "").rstrip("/") + url name = name_el.get_text(strip=True) if name_el else None price = price_el.get_text(strip=True) if price_el else None pid = t.get("data-style-id") or t.get("data-product-id") or None out.append(Product( product_id=pid, url=url, name=name, price=price, currency=None, image_urls=[], color=None, size_variants=[] )) except Exception: continue return out def parse_products_from_xhr(xhrs: List[Dict[str, Any]]) -> List[Product]: out = [] for item in xhrs: j = item.get("json") or {} candidates = [] if isinstance(j, dict): for key in ["products", "items", "results", "hits"]: if isinstance(j.get(key), list): candidates = j[key] break if not candidates and isinstance(j, list): candidates = j for p in candidates: pid = str(p.get("id") or p.get("productId") or p.get("styleId") or "") or None url = p.get("url") or p.get("link") or None name = p.get("name") or p.get("productName") or None price = None currency = None for k in ["price", "currentPrice", "sellingPrice"]: v = p.get(k) if isinstance(v, (int, float, str)): price = str(v) break if isinstance(v, dict): price = str(v.get("value") or v.get("amount") or "") currency = v.get("currency") or currency images = [] for k in ["images", "imageList", "media"]: v = p.get(k) if isinstance(v, list): for it in v: if isinstance(it, str): images.append(it) elif isinstance(it, dict): for kk in ["url", "src", "href"]: if it.get(kk): images.append(it[kk]) out.append(Product( product_id=pid, url=url, name=name, price=price, currency=currency, image_urls=images, color=p.get("color") or None, size_variants=[s for s in p.get("sizes", []) if isinstance(s, str)] )) return out