import asyncio import logging import re import json import os from datetime import datetime from pathlib import Path from typing import List, Dict, Any, Optional import re from playwright.async_api import async_playwright from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type # ---- Price parsing helpers ---- _PLN_PRICE_RE = re.compile( r'(? float | None: """ '1 299,00 zł' / '1299 zł' / '1 299 zł' -> 1299.00 Возвращает None, если распарсить не удалось. """ if not price_text: return None t = ( price_text .replace("\u00a0", " ") # NBSP .replace("\u2009", " ") # thin space .strip() ) m = _PLN_PRICE_RE.search(t) if not m: return None num = m.group(1) num = num.replace(" ", "").replace("\u00a0", "").replace("\u2009", "") num = num.replace(",", ".") try: return float(num) except Exception: return None class FetchError(Exception): pass class Fetcher: """ Browser layer: Playwright Chromium with anti-bot hygiene and robust debug dumps. - Blocks heavy resources (fonts/media/images), keeps stylesheets. - Waits for either SSR summary scripts or window.ssrClientSettings. - Two ways to read product summaries: 1) window.ssrClientSettings.productSummary 2) inline