--- a/.gitignore
+++ b/.gitignore
@ -28,6 +28,7 @@ Temporary Items
 .apdisk
 __pycache__
-/Parsing ZARAHOME/src/records_folder
+records_folder
 Ignore_Temp
 /Processing/Files-todo
 out
--- a/Parser_NEXT/.env.example
+++ b/Parser_NEXT/.env.example
@ -0,0 +1,2 @@
 # PROXY=http://user:pass@host:port
 # RATE_LIMIT=1.0
--- a/Parser_NEXT/README.md
+++ b/Parser_NEXT/README.md
@ -0,0 +1,17 @@
 # NEXT.pl Parser (Playwright, Python 3.12)
 ## Quick start
 ```bash
 python -m venv .venv
 source .venv/bin/activate  # Windows: .venv\Scripts\activate
 pip install -r requirements.txt
 python -m playwright install chromium
 python main.py
 ```
 **categories.xlsx** — формат ввода:
 - Первая колонка (A) — ссылки на категории (без заголовка).
 - Любые другие колонки (B, C, …) игнорируются (можно писать пометки).
 - Пустые строки и ячейки не учитываются.
 Outputs land in **records_folder/** as XLSX (+CSV/JSONL). Configure selectors/scroll in **config.yaml**.
--- a/Parser_NEXT/categories.xlsx
+++ b/Parser_NEXT/categories.xlsx
--- a/Parser_NEXT/config.yaml
+++ b/Parser_NEXT/config.yaml
@ -0,0 +1,53 @@
 base_url: "https://www.next.pl/en"
 locale: "en-GB"
 timezoneId: "Europe/Warsaw"
 # На время отладки удобно видеть браузер:
 headless: false
 nav_timeout_ms: 60000
 wait_timeout_ms: 30000
 retries: 3
 # Рейт-лимит можно настраивать при масштабировании
 rate_limit_per_host_per_sec: 1.0
 scroll:
  # Старые параметры (используются в резервном auto_scroll и для пауз)
  max_scrolls: 80
  pause_ms_between_scrolls_min: 300
  pause_ms_between_scrolls_max: 700
  stop_if_no_new_items_after: 8
  # Новые параметры для auto_scroll_until_total
  hard_max_scrolls: 2500            # предохранитель на максимум скроллов
  wait_networkidle_timeout_ms: 8000 # ожидание networkidle после каждого скролла
 selectors:
  # карточки товаров
  product_tile: '[data-testid="plp-product-grid-item"], [data-testid="product-tile"], .ProductCard, [data-qa="plp-product"]'
  product_link: 'a[href*="/style/"], a[href*="/p/"], a[data-testid="productLink"]'
  product_name: '[data-testid="product-name"], .productName, [itemprop="name"]'
  product_price: '[data-testid="price"], [itemprop="price"], .price'
  # признак готовности
  grid_ready: 'script[id^="next-product-summary-script-"], [data-testid="plp-product-grid-item"], [data-testid="product-grid"], .plpGrid, [data-qa="plp-grid"]'
  # счётчик общего количества в шапке (например "(434)")
  total_count: '#plp-seo-heading .esi-count, .esi-count'
 xhr_patterns:
  - "/search"
  - "/api/search"
  - "/plp"
  - "/productsummary"
 output:
  folder: "records_folder"
  excel_prefix: "next_dump"
  csv_also: true
  jsonl_also: true
 debug:
  dump_always: false  # true — чтобы писать дампы на каждом шаге
--- a/Parser_NEXT/fetcher.py
+++ b/Parser_NEXT/fetcher.py
@ -0,0 +1,636 @@
 import asyncio
 import logging
 import re
 import json
 import os
 from datetime import datetime
 from pathlib import Path
 from typing import List, Dict, Any, Optional
 import re
 from playwright.async_api import async_playwright
 from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
 # ---- Price parsing helpers ----
 _PLN_PRICE_RE = re.compile(
    r'(?<!\d)(\d{1,3}(?:[ \u00A0]?\d{3})*(?:[.,]\d{2})?)(?:\s*(?:zł|PLN))',
    re.IGNORECASE,
 )
 def parse_pln_price_to_float(price_text: str | None) -> float | None:
    """
    '1 299,00 zł' / '1299 zł' / '1 299 zł' -> 1299.00
    Возвращает None, если распарсить не удалось.
    """
    if not price_text:
        return None
    t = (
        price_text
        .replace("\u00a0", " ")  # NBSP
        .replace("\u2009", " ")  # thin space
        .strip()
    )
    m = _PLN_PRICE_RE.search(t)
    if not m:
        return None
    num = m.group(1)
    num = num.replace(" ", "").replace("\u00a0", "").replace("\u2009", "")
    num = num.replace(",", ".")
    try:
        return float(num)
    except Exception:
        return None
 class FetchError(Exception):
    pass
 class Fetcher:
    """
    Browser layer: Playwright Chromium with anti-bot hygiene and robust debug dumps.
    - Blocks heavy resources (fonts/media/images), keeps stylesheets.
    - Waits for either SSR summary scripts or window.ssrClientSettings.
    - Two ways to read product summaries:
        1) window.ssrClientSettings.productSummary
        2) inline <script id="next-product-summary-script-..."> content (fallback)
    - Captures XHR JSON responses by patterns.
    - Dumps HTML/PNG with timestamps at key checkpoints and on failure.
    """
    def __init__(self, cfg: Dict[str, Any]):
        self.cfg = cfg
        self.base_url = cfg.get("base_url")
        self.xhr_patterns = [re.compile(p) for p in cfg.get("xhr_patterns", [])]
        self.collected_xhr: List[Dict[str, Any]] = []
    async def __aenter__(self):
        self.playwright = await async_playwright().start()
        args = ["--disable-dev-shm-usage", "--no-sandbox"]
        self.browser = await self.playwright.chromium.launch(
            headless=self.cfg.get("headless", True),
            args=args,
            devtools=not self.cfg.get("headless", True),
        )
        self.context = await self.browser.new_context(
            locale=self.cfg.get("locale", "en-GB"),
            timezone_id=self.cfg.get("timezoneId", "Europe/Warsaw"),
            user_agent=(
                "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
                "AppleWebKit/537.36 (KHTML, like Gecko) "
                "Chrome/124.0.0.0 Safari/537.36"
            ),
            viewport={"width": 1366, "height": 900},
        )
        self.page = await self.context.new_page()
        # Block heavy resources; keep stylesheets.
        await self.context.route("**/*", self._route)
        # Listen to JSON XHRs for optional parsing.
        self.page.on("response", self._on_response)
        self.page.on("console", lambda msg: logging.debug(f"[page.console] {msg.type} {msg.text}"))
        return self
    async def __aexit__(self, exc_type, exc, tb):
        await self.context.close()
        await self.browser.close()
        await self.playwright.stop()
    async def _route(self, route, request):
        """
        Блокируем часть тяжёлых ресурсов.
        Для отладки с картинками убери 'image' из списка.
        """
        if request.resource_type in ["font", "media", "image"]:
            return await route.abort()
        return await route.continue_()
    def _on_response(self, response):
        try:
            url = response.url
            if any(p.search(url) for p in self.xhr_patterns):
                if "application/json" in (response.headers.get("content-type", "")):
                    self.collected_xhr.append({"url": url, "response": response})
        except Exception:
            pass
    async def _dump_debug(self, tag: str):
        """Save HTML and screenshot with timestamp; log absolute paths and CWD."""
        try:
            raw_dir = Path("out/raw_html").resolve()
            raw_dir.mkdir(parents=True, exist_ok=True)
            ts = datetime.utcnow().strftime("%Y%m%d_%H%M%S_%f")
            html_path = raw_dir / f"{ts}_{tag}.html"
            png_path = raw_dir / f"{ts}_{tag}.png"
            cwd = Path(os.getcwd()).resolve()
            logging.info(f"[dump_debug] CWD={cwd}  → html={html_path}  png={png_path}")
            try:
                html = await self.page.content()
                html_path.write_text(html, encoding="utf-8")
            except Exception as e:
                logging.warning(f"[dump_debug] writing HTML failed: {e}")
            try:
                await self.page.screenshot(path=str(png_path), full_page=True)
            except Exception as e:
                logging.warning(f"[dump_debug] screenshot failed: {e}")
            logging.info(f"[dump_debug] saved OK: {html_path.name}, {png_path.name}")
        except Exception as e:
            logging.warning(f"[dump_debug] general fail: {e}")
    async def _accept_cookies_if_any(self):
        selectors = [
            "#onetrust-accept-btn-handler",
            "button#onetrust-accept-btn-handler",
            'button:has-text("Accept all")',
            'button:has-text("Accept All")',
            'button[aria-label*="Accept"]',
        ]
        for sel in selectors:
            try:
                el = self.page.locator(sel)
                if await el.count() > 0:
                    await el.first.click(timeout=2000)
                    logging.info("Cookie banner accepted.")
                    break
            except Exception:
                pass
    async def _log_plp_state(self, stage: str):
        """Log counts of SSR scripts and presence of window.ssrClientSettings."""
        try:
            scripts_count = await self.page.locator('script[id^="next-product-summary-script-"]').count()
        except Exception:
            scripts_count = -1
        try:
            has_window = await self.page.evaluate("""() => {
                const ps = globalThis?.ssrClientSettings?.productSummary;
                return !!(ps && Array.isArray(ps.itemNumbers) && ps.itemNumbers.length > 0);
            }""")
        except Exception:
            has_window = False
        logging.info(f"[{stage}] scripts: {scripts_count}, window.ps: {has_window}")
    @retry(
        stop=stop_after_attempt(3),
        wait=wait_exponential(multiplier=1, min=1, max=8),
        retry=retry_if_exception_type(FetchError),
    )
    async def load_category(self, url: str):
        """
        Navigation + robust readiness:
        1) domcontentloaded
        2) accept cookies
        3) warm-up scroll
        4) wait for <script id^="next-product-summary-script-"> (attached)
        5) attempt window.ssrClientSettings (non-fatal)
        Dumps at key checkpoints and on failure.
        """
        try:
            await self.page.goto(
                url,
                timeout=self.cfg.get("nav_timeout_ms", 60000),
                wait_until="domcontentloaded",
            )
            await self._dump_debug("after_goto")
            await self._accept_cookies_if_any()
            await self._dump_debug("after_cookies")
            await self._log_plp_state("after_accept")
            # warm-up scroll to trigger scripts/lazy
            for _ in range(3):
                await self.page.mouse.wheel(0, 1600)
                await self.page.wait_for_timeout(300)
            await self._dump_debug("after_warmup")
            await self._log_plp_state("after_warmup")
            # wait for SSR script tags
            await self.page.wait_for_selector(
                'script[id^="next-product-summary-script-"]',
                state="attached",
                timeout=self.cfg.get("wait_timeout_ms", 30000),
            )
            await self._dump_debug("after_scripts_present")
            # optional window readiness
            try:
                await self.page.wait_for_function(
                    """
                    () => {
                        const ps = globalThis?.ssrClientSettings?.productSummary;
                        return !!(ps && Array.isArray(ps.itemNumbers) && ps.itemNumbers.length > 0);
                    }
                    """,
                    timeout=5000,
                )
            except Exception:
                logging.info("window.ssrClientSettings not ready (non-fatal).")
            await self._dump_debug("after_window_check")
            return True
        except Exception as e:
            logging.error(f"load_category failed: {e}")
            await self._dump_debug("fail_load_category")
            raise FetchError(str(e))
    # ---------- NEW: read total count and scroll until target ----------
    async def read_total_from_header(self) -> Optional[int]:
        """
        Tries to read category total from the header count like '(434)'.
        Looks in '#plp-seo-heading .esi-count' or any '.esi-count' fallback.
        """
        sels = ["#plp-seo-heading .esi-count", ".esi-count"]
        for sel in sels:
            try:
                el = self.page.locator(sel)
                if await el.count() > 0:
                    txt = await el.first.inner_text(timeout=1500)
                    digits = "".join(ch for ch in txt if ch.isdigit())
                    if digits:
                        total = int(digits)
                        logging.info(f"Total from header: {total}")
                        return total
            except Exception:
                continue
        logging.info("Total from header: not found")
        return None
    async def auto_scroll_until_total(self, hard_max_scrolls: Optional[int] = None):
        """
        Scrolls until we reach target total (from header), with a hard cap.
        Uses networkidle + a small jiggle to retrigger lazy loading.
        """
        hard_cap = hard_max_scrolls or self.cfg.get("scroll", {}).get("hard_max_scrolls", 2000)
        netidle_ms = self.cfg.get("scroll", {}).get("wait_networkidle_timeout_ms", 8000)
        # Combined product tile selector
        sel_tiles = '[data-testid="plp-product-grid-item"], [data-testid="product-tile"], .ProductCard, [data-qa="plp-product"]'
        target = await self.read_total_from_header()
        last = 0
        same_ticks = 0
        same_limit = self.cfg.get("scroll", {}).get("stop_if_no_new_items_after", 8)
        for i in range(hard_cap):
            # Scroll to bottom
            try:
                await self.page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
            except Exception:
                pass
            # Wait for network idle
            try:
                await self.page.wait_for_load_state("networkidle", timeout=netidle_ms)
            except Exception:
                # not fatal
                await asyncio.sleep(0.25)
            # Jiggle to retrigger observers
            try:
                await self.page.mouse.wheel(0, -200)
                await asyncio.sleep(0.1)
                await self.page.mouse.wheel(0, 1200)
            except Exception:
                pass
            try:
                seen = await self.page.locator(sel_tiles).count()
            except Exception:
                seen = last
            if target and seen >= target:
                logging.info(f"Reached target: seen {seen}/{target} (i={i})")
                break
            if seen <= last:
                same_ticks += 1
                if same_ticks >= same_limit:
                    logging.info(f"No growth for a while: seen={seen}, i={i}")
                    break
            else:
                same_ticks = 0
                last = seen
        logging.info(f"Final seen items: {last} (target={target}, cap={hard_cap})")
    # ---------- existing helpers ----------
    async def current_html(self) -> str:
        return await self.page.content()
    async def extract_xhr_json(self) -> List[Dict[str, Any]]:
        results = []
        for entry in self.collected_xhr:
            try:
                body = await entry["response"].json()
                results.append({"url": entry["url"], "json": body})
            except Exception:
                pass
        return results
    async def read_ssr_product_summaries(self) -> List[Dict[str, Any]]:
        """
        Returns simplified product summaries.
        Path 1: window.ssrClientSettings.productSummary
        Path 2: parse inline <script id="next-product-summary-script-..."> blocks
        """
        # Path 1 — from window
        js_window = """
        () => {
          const out = [];
          const ps = globalThis?.ssrClientSettings?.productSummary;
          if (!ps) return out;
          const ids = Array.isArray(ps.itemNumbers) ? ps.itemNumbers : [];
          for (const id of ids) {
            const obj = ps[id];
            if (!obj) continue;
            const sd = obj?._STATE_?.productSummary?.summaryData;
            if (!sd) continue;
            const cw = Array.isArray(sd.colourways) && sd.colourways.length ? sd.colourways[0] : null;
            out.push({
              id: sd.id || null,
              title: sd.title || null,
              baseUrl: sd.baseUrl || null,
              brand: sd.brand || null,
              category: sd.category || null,
              currencyCode: sd.currencyCode || null,
              colourway: cw ? {
                id: cw.id ?? null,
                url: cw.url ?? null,
                color: cw.c ?? null,
                title: cw.t ?? null,
                price: cw.p ?? null,
                priceMarket: cw.mp ?? null,
                selected: !!cw.s
              } : null,
              imageCdnUrl: sd.imageCdnUrl || null,
              productImageUrlPart: sd.productImageUrlPart || null,
              lgImagePath: sd.lgImagePath || null
            });
          }
          return out;
        }
        """
        try:
            w = await self.page.evaluate(js_window)
            if isinstance(w, list) and w:
                logging.info(f"SSR(window) summaries: {len(w)}")
                return w
        except Exception:
            pass
        # Path 2 — parse inline scripts
        js_scripts = """
        () => {
          const list = Array.from(document.querySelectorAll('script[id^="next-product-summary-script-"]'));
          return list.map(s => s.textContent || "");
        }
        """
        try:
            texts = await self.page.evaluate(js_scripts)
        except Exception:
            return []
        out: List[Dict[str, Any]] = []
        # productSummary["ID"] = { ... } OR productSummary['ID'] = { ... }
        assign_re = re.compile(r'productSummary\s*\[\s*([\'"])(.*?)\1\s*\]\s*=\s*\{')
        for t in texts or []:
            for m in assign_re.finditer(t):
                start = m.end() - 1  # at '{'
                depth = 0
                end = None
                for i in range(start, len(t)):
                    ch = t[i]
                    if ch == "{":
                        depth += 1
                    elif ch == "}":
                        depth -= 1
                        if depth == 0:
                            end = i + 1
                            break
                if end is None:
                    continue
                block = t[start:end]
                try:
                    data = json.loads(block)
                    sd = (
                        data.get("_STATE_", {})
                        .get("productSummary", {})
                        .get("summaryData", {})
                    )
                    cws = sd.get("colourways") or []
                    cw = cws[0] if cws else None
                    out.append(
                        {
                            "id": sd.get("id"),
                            "title": sd.get("title"),
                            "baseUrl": sd.get("baseUrl"),
                            "brand": sd.get("brand"),
                            "category": sd.get("category"),
                            "currencyCode": sd.get("currencyCode"),
                            "colourway": {
                                "id": cw.get("id"),
                                "url": cw.get("url"),
                                "color": cw.get("c"),
                                "title": cw.get("t"),
                                "price": cw.get("p"),
                                "priceMarket": cw.get("mp"),
                                "selected": bool(cw.get("s")),
                            } if cw else None,
                            "imageCdnUrl": sd.get("imageCdnUrl"),
                            "productImageUrlPart": sd.get("productImageUrlPart"),
                            "lgImagePath": sd.get("lgImagePath"),
                        }
                    )
                except Exception:
                    continue
    async def read_dom_products(self) -> List[Dict[str, Any]]:
        """
        Парсит карточки из DOM после прокрутки.
        Покрывает несколько вариантов разметки Next PLP.
        """
        js = r"""
        () => {
          const out = [];
          const gridItems = document.querySelectorAll('[data-testid="plp-product-grid-item"], .ProductCard, [data-qa="plp-product"]');
          const getPid = (container) => {
            // Вариант 1: data-pid на entrypoint
            const entry = container.querySelector('[id^="plp-product-summary-entrypoint-"]');
            if (entry && entry.getAttribute('data-pid')) return entry.getAttribute('data-pid');
            // Вариант 2: id="plp-product-summary-tile-<ID>"
            const tile = container.closest('[id^="plp-product-summary-tile-"]') || container.querySelector('[id^="plp-product-summary-tile-"]');
            if (tile) {
              const m = (tile.id || '').match(/plp-product-summary-tile-([A-Za-z0-9]+)/);
              if (m) return m[1];
            }
            // Вариант 3: вытащим из href вида .../<ID>#<ID> или .../T43162
            const a = container.querySelector('a[href*="/style/"], a[data-testid^="product_summary_tile_"], a[href*="/p/"]');
            if (a) {
              const href = a.getAttribute('href') || '';
              const m2 = href.match(/([A-Z]\d{4,})/i);
              if (m2) return m2[1].toUpperCase();
            }
            return null;
          };
          const getAbsUrl = (href) => {
            try {
              if (!href) return null;
              if (/^https?:\/\//i.test(href)) return href;
              const a = document.createElement('a');
              a.href = href;
              return a.href;
            } catch { return href || null; }
          };
          const getTitle = (container) => {
            const t1 = container.querySelector('[data-testid="product_summary_title"]');
            if (t1) return (t1.getAttribute('data-label') || t1.textContent || '').trim();
            const t2 = container.querySelector('[data-testid="product-name"], .productName, [itemprop="name"]');
            if (t2) return (t2.textContent || '').trim();
            return null;
          };
          const getPriceText = (container) => {
            // охватим несколько вариантов
            const priceRoots = [
              container.querySelector('[data-testid="price"]'),
              container.querySelector('[data-testid="ProductCard-Price"]'),
              container.querySelector('[itemprop="price"]'),
              container.querySelector('[aria-label*="price" i]'),
              container
            ].filter(Boolean);
            for (const root of priceRoots) {
              const spans = root.querySelectorAll('span, div');
              for (const el of spans) {
                const t = (el.textContent || '').trim();
                if (!t) continue;
                if (/\d/.test(t) && (t.includes('zł') || /PLN/i.test(t))) {
                  return t;
                }
              }
            }
            return null;
          };
          gridItems.forEach(container => {
            // Основная ссылка
            const link = container.querySelector('a[href*="/style/"], a[data-testid^="product_summary_tile_"], a[href*="/p/"]');
            const href = link ? link.getAttribute('href') : null;
            const rec = {
              id: getPid(container),
              title: getTitle(container),
              url: getAbsUrl(href),
              price_text: getPriceText(container),
              currency: null
            };
            if (rec.price_text) {
              if (rec.price_text.includes('zł') || /PLN/i.test(rec.price_text)) rec.currency = 'PLN';
            }
            // фильтруем пустые карточки без ссылки и заголовка
            if (rec.url || rec.title) out.push(rec);
          });
          // Удаляем дубли по id|url
          const seen = new Set();
          const uniq = [];
          for (const d of out) {
            const key = `${d.id || ''}|${d.url || ''}`;
            if (seen.has(key)) continue;
            seen.add(key);
            uniq.push(d);
          }
          return uniq;
        }
        """
        try:
            data = await self.page.evaluate(js)
            logging.info(f"DOM cards parsed: {len(data)}")
            return data
        except Exception as e:
            logging.warning(f"read_dom_products failed: {e}")
            return []
    async def collect_products(self) -> List[Dict[str, Any]]:
        """
        Унифицированный сбор: SSR (если есть) + DOM.
        Нормализуем к: id, title, url, price(float|None), currency('PLN'|...).
        """
        ssr = await self.read_ssr_product_summaries() or []
        dom = await self.read_dom_products() or []
        bykey: Dict[str, Dict[str, Any]] = {}
        def key(d: Dict[str, Any]) -> str:
            return f"{(d.get('id') or '')}|{(d.get('url') or '')}"
        # 1) Скелет из DOM
        for d in dom:
            bykey[key(d)] = {
                "id": d.get("id"),
                "title": d.get("title"),
                "url": d.get("url"),
                "price_text": d.get("price_text"),
                "currency": d.get("currency"),
            }
        # 2) Обогащаем из SSR (если есть)
        for s in ssr:
            cw = (s.get("colourway") or {})
            # собрать абсолютный URL
            url = None
            try:
                base = (s.get("baseUrl") or "").rstrip("/")
                rel = (cw.get("url") or "").lstrip("/")
                url = f"{base}/{rel}" if (base and rel) else None
            except Exception:
                pass
            cand = {"id": s.get("id"), "url": url}
            k = key(cand)
            rec = bykey.get(k)
            if rec is None:
                bykey[k] = {
                    "id": s.get("id"),
                    "title": s.get("title"),
                    "url": url,
                    "price_text": cw.get("price"),
                    "currency": s.get("currencyCode"),
                }
            else:
                if not rec.get("title") and s.get("title"):
                    rec["title"] = s["title"]
                if not rec.get("price_text") and cw.get("price"):
                    rec["price_text"] = cw["price"]
                if not rec.get("currency") and s.get("currencyCode"):
                    rec["currency"] = s["currencyCode"]
        # 3) Финальная нормализация цены
        out: List[Dict[str, Any]] = []
        for v in bykey.values():
            price_val = parse_pln_price_to_float(v.get("price_text"))
            currency = v.get("currency")
            if not currency and (v.get("price_text") or "").lower().find("zł") != -1:
                currency = "PLN"
            out.append({
                "id": v.get("id"),
                "title": v.get("title"),
                "url": v.get("url"),
                "price": price_val,           # float или None
                "currency": currency or "PLN"
            })
        logging.info(f"Total collected (SSR+DOM): {len(out)}")
        return out
--- a/Parser_NEXT/main.py
+++ b/Parser_NEXT/main.py
@ -0,0 +1,193 @@
 import asyncio
 import logging
 from pathlib import Path
 from typing import List, Tuple
 from datetime import timedelta
 import pandas as pd
 import yaml
 from fetcher import Fetcher, FetchError
 from sink import write_outputs
 from models import Product
 # ---------- конфиг/логи ----------
 def setup_logging():
    Path("out/logs").mkdir(parents=True, exist_ok=True)
    logging.basicConfig(
        level=logging.INFO,
        format="%(asctime)s %(levelname)s %(message)s",
        handlers=[
            logging.FileHandler("out/logs/run.log", encoding="utf-8"),
            logging.StreamHandler(),
        ],
    )
 def load_config() -> dict:
    with open("config.yaml", "r", encoding="utf-8") as f:
        return yaml.safe_load(f)
 # ---------- загрузка категорий из первой колонки A ----------
 def load_categories() -> List[Tuple[str, str]]:
    """
    Читает categories.xlsx без заголовка.
    Берёт только первую колонку (A) — ссылки на категории.
    Имя категории вычисляет из последнего сегмента URL.
    """
    from urllib.parse import urlparse
    xlsx = Path("categories.xlsx")
    if not xlsx.exists():
        # демо, если файл не создан
        return [
            ("bathroom-accessories", "https://www.next.pl/en/shop/home/bathroom/bathroom-accessories"),
        ]
    df = pd.read_excel(xlsx, header=None)
    if df.shape[1] == 0:
        return []
    urls: List[str] = []
    for val in df.iloc[:, 0].tolist():
        if isinstance(val, str):
            u = val.strip()
        elif pd.notna(val):
            u = str(val).strip()
        else:
            continue
        if not u or not u.lower().startswith(("http://", "https://")):
            continue
        urls.append(u)
    def name_from_url(u: str) -> str:
        p = urlparse(u)
        parts = [s for s in p.path.split("/") if s]
        return parts[-1] if parts else p.netloc
    return [(name_from_url(u), u) for u in urls]
 # ---------- адаптер: dict -> Product ----------
 def normalize_to_models(collected: List[dict]) -> List[Product]:
    out: List[Product] = []
    for d in collected:
        pid = d.get("id")
        url = d.get("url")
        title = d.get("title")
        price_val = d.get("price")  # float | None
        currency = (d.get("currency") or "PLN").upper()
        price_str = None
        if price_val is not None:
            try:
                price_str = f"{float(price_val):.2f}"
            except Exception:
                price_str = None
        out.append(Product(
            product_id=str(pid) if pid is not None else None,
            url=str(url) if url else None,
            name=title,
            price=price_str,
            currency=currency,
            image_urls=[],
            color=None,
            size_variants=[]
        ))
    return out
 # ---------- основной сценарий ----------
 async def run_category(fetcher: Fetcher, cfg: dict, name: str, url: str):
    logging.info(f"Category start: {name} — {url}")
    try:
        await fetcher.load_category(url)
        # доскроллить до полного количества (считает из шапки "(N)")
        await fetcher.auto_scroll_until_total()
        # собрать товары (SSR + DOM)
        collected = await fetcher.collect_products()
        products = normalize_to_models(collected)
        # сохранить в xlsx/csv/jsonl
        path, n = write_outputs(
            category_name=name,
            category_url=url,
            products=products,
            out_folder=cfg["output"]["folder"],
            excel_prefix=cfg["output"]["excel_prefix"],
            csv_also=cfg["output"].get("csv_also", True),
            jsonl_also=cfg["output"].get("jsonl_also", True),
        )
        logging.info(f"✔ {name}: {n} товаров → {path}")
    except FetchError as e:
        logging.error(f"Category failed: {name} — {e}")
    except Exception as e:
        logging.exception(f"Category crashed: {name} — {e}")
 async def main_async():
    setup_logging()
    cfg = load_config()
    categories = load_categories()
    if not categories:
        logging.warning("categories.xlsx пуст — добавьте ссылки в первую колонку (без заголовков).")
        return
    # Аккумулятор для общего XLSX
    master_rows: List[dict] = []
    # Имя общего файла: all_YYYYMMDD_HHMMSS_UTC+3.xlsx
    now_utc = pd.Timestamp.utcnow().to_pydatetime()
    ts_utc_plus3 = (now_utc + timedelta(hours=3)).strftime("%Y%m%d_%H%M%S")
    all_filename = f"all_{ts_utc_plus3}_UTC+3.xlsx"
    all_path = str(Path(cfg["output"]["folder"]) / all_filename)
    async with Fetcher(cfg) as fetcher:
        for name, url in categories:
            # обычный прогон по категории
            try:
                logging.info(f"Category start: {name} — {url}")
                await fetcher.load_category(url)
                await fetcher.auto_scroll_until_total()
                collected = await fetcher.collect_products()
                products = normalize_to_models(collected)
                # запись per‑category
                path, n, rows = write_outputs(
                    category_name=name,
                    category_url=url,
                    products=products,
                    out_folder=cfg["output"]["folder"],
                    excel_prefix=cfg["output"]["excel_prefix"],
                    csv_also=cfg["output"].get("csv_also", True),
                    jsonl_also=cfg["output"].get("jsonl_also", True),
                )
                logging.info(f"✔ {name}: {n} товаров → {path}")
                # накапливаем в общий список
                master_rows.extend(rows)
            except FetchError as e:
                logging.error(f"Category failed: {name} — {e}")
            except Exception as e:
                logging.exception(f"Category crashed: {name} — {e}")
    # По завершении всех категорий — пишем общий XLSX
    from sink import write_master_excel
    all_written_path, total = write_master_excel(all_path, master_rows)
    logging.info(f"◎ ALL: {total} товаров → {all_written_path}")
 def main():
    asyncio.run(main_async())
 if __name__ == "__main__":
    main()
--- a/Parser_NEXT/models.py
+++ b/Parser_NEXT/models.py
@ -0,0 +1,23 @@
 from pydantic import BaseModel, Field, HttpUrl
 from typing import Optional, List
 class Product(BaseModel):
    product_id: Optional[str] = Field(default=None)
    url: Optional[HttpUrl] = None
    name: Optional[str] = None
    price: Optional[str] = None
    currency: Optional[str] = None
    image_urls: List[str] = []
    color: Optional[str] = None
    size_variants: List[str] = []
 class RowOut(BaseModel):
    category_name: str
    category_url: str
    product_id: Optional[str]
    url: Optional[str]
    name: Optional[str]
    price: Optional[str]
    currency: Optional[str]
    color: Optional[str]
    images_joined: Optional[str]
--- a/Parser_NEXT/parser.py
+++ b/Parser_NEXT/parser.py
@ -0,0 +1,115 @@
 from urllib.parse import urljoin
 from bs4 import BeautifulSoup
 from typing import List, Dict, Any
 from models import Product
 def parse_products_from_ssr(summaries: List[Dict[str, Any]]) -> List[Product]:
    out: List[Product] = []
    for s in summaries or []:
        cw = (s.get("colourway") or {}) if isinstance(s, dict) else {}
        base = s.get("baseUrl") or ""
        rel = cw.get("url") or ""
        url = urljoin(base + "/", rel) if rel else (base or None)
        name = s.get("title") or cw.get("title") or None
        price = cw.get("price") or cw.get("priceMarket") or None
        color = cw.get("color") or None
        currency = s.get("currencyCode") or None
        out.append(Product(
            product_id = s.get("id") or None,
            url = url,
            name = name,
            price = str(price) if price is not None else None,
            currency = currency,
            image_urls = [],   # картинки построим позже по imageCdnUrl + productImageUrlPart
            color = color,
            size_variants = [] # для homeware обычно пусто; для fashion добавим позже
        ))
    return out
 def parse_products_from_dom(html: str, cfg: Dict[str, Any]) -> List[Product]:
    soup = BeautifulSoup(html, "lxml")
    sel = cfg["selectors"]
    tiles = soup.select(sel["product_tile"])
    out = []
    for t in tiles:
        try:
            a = t.select_one(sel["product_link"])
            name_el = t.select_one(sel["product_name"])
            price_el = t.select_one(sel["product_price"])
            url = a.get("href") if a else None
            if url and url.startswith("/"):
                url = cfg.get("base_url", "").rstrip("/") + url
            name = name_el.get_text(strip=True) if name_el else None
            price = price_el.get_text(strip=True) if price_el else None
            pid = t.get("data-style-id") or t.get("data-product-id") or None
            out.append(Product(
                product_id=pid,
                url=url,
                name=name,
                price=price,
                currency=None,
                image_urls=[],
                color=None,
                size_variants=[]
            ))
        except Exception:
            continue
    return out
 def parse_products_from_xhr(xhrs: List[Dict[str, Any]]) -> List[Product]:
    out = []
    for item in xhrs:
        j = item.get("json") or {}
        candidates = []
        if isinstance(j, dict):
            for key in ["products", "items", "results", "hits"]:
                if isinstance(j.get(key), list):
                    candidates = j[key]
                    break
        if not candidates and isinstance(j, list):
            candidates = j
        for p in candidates:
            pid = str(p.get("id") or p.get("productId") or p.get("styleId") or "") or None
            url = p.get("url") or p.get("link") or None
            name = p.get("name") or p.get("productName") or None
            price = None
            currency = None
            for k in ["price", "currentPrice", "sellingPrice"]:
                v = p.get(k)
                if isinstance(v, (int, float, str)):
                    price = str(v)
                    break
                if isinstance(v, dict):
                    price = str(v.get("value") or v.get("amount") or "")
                    currency = v.get("currency") or currency
            images = []
            for k in ["images", "imageList", "media"]:
                v = p.get(k)
                if isinstance(v, list):
                    for it in v:
                        if isinstance(it, str):
                            images.append(it)
                        elif isinstance(it, dict):
                            for kk in ["url", "src", "href"]:
                                if it.get(kk):
                                    images.append(it[kk])
            out.append(Product(
                product_id=pid,
                url=url,
                name=name,
                price=price,
                currency=currency,
                image_urls=images,
                color=p.get("color") or None,
                size_variants=[s for s in p.get("sizes", []) if isinstance(s, str)]
            ))
    return out
--- a/Parser_NEXT/requirements.txt
+++ b/Parser_NEXT/requirements.txt
@ -0,0 +1,8 @@
 playwright==1.46.0
 pandas==2.2.2
 openpyxl==3.1.5
 pydantic==2.8.2
 pyyaml==6.0.2
 tenacity==8.3.0
 beautifulsoup4==4.12.3
 lxml==5.2.1
--- a/Parser_NEXT/sink.py
+++ b/Parser_NEXT/sink.py
@ -0,0 +1,108 @@
 import pandas as pd
 from pathlib import Path
 from typing import List, Dict, Any
 from models import Product, RowOut
 import hashlib, json, datetime
 import re
 # ---- Price parsing helpers ----
 _PLN_PRICE_RE = re.compile(
    r'(?<!\d)(\d{1,3}(?:[ \u00A0]?\d{3})*(?:[.,]\d{2})?)(?:\s*(?:zł|PLN))',
    re.IGNORECASE,
 )
 def parse_pln_price_to_float(price_text: str | None) -> float | None:
    """
    Из строки вида '1 299,00 zł' / '1299 zł' / '1 299 zł' достаём float 1299.00.
    Возвращает None, если распарсить не удалось.
    """
    if not price_text:
        return None
    t = (
        price_text.replace("\u00a0", " ")  # NBSP
        .replace("\u2009", " ")            # thin space
        .strip()
    )
    m = _PLN_PRICE_RE.search(t)
    if not m:
        return None
    num = m.group(1)
    num = num.replace(" ", "").replace("\u00a0", "").replace("\u2009", "")
    num = num.replace(",", ".")
    try:
        return float(num)
    except Exception:
        return None
 def _as_str(v):
    return str(v) if v is not None else ""
 def _key_from_fields(product_id: str | None, url: str | None) -> str:
    base = f"{_as_str(product_id)}|{_as_str(url)}"
    return hashlib.md5(base.encode("utf-8")).hexdigest()
 def _key(p: Product) -> str:
    return _key_from_fields(p.product_id, _as_str(p.url))
 def build_rows(category_name: str, category_url: str, products: List[Product]) -> List[Dict[str, Any]]:
    """Построить список строк RowOut (dict) из продуктов."""
    rows: List[Dict[str, Any]] = []
    seen: set[str] = set()
    for p in products:
        k = _key(p)
        if k in seen:
            continue
        seen.add(k)
        rows.append(RowOut(
            category_name=category_name,
            category_url=category_url,
            product_id=_as_str(p.product_id) or None,
            url=_as_str(p.url) or None,
            name=p.name,
            price=p.price,
            currency=p.currency,
            color=p.color,
            images_joined="\n".join(p.image_urls) if p.image_urls else None
        ).model_dump())
    return rows
 def write_outputs(category_name: str, category_url: str, products: List[Product], out_folder: str, excel_prefix: str, csv_also: bool, jsonl_also: bool):
    """Запись per‑category файлов (xlsx + опционально csv/jsonl). Возвращает (excel_path, nrows, rows)."""
    Path(out_folder).mkdir(parents=True, exist_ok=True)
    rows = build_rows(category_name, category_url, products)
    ts = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
    excel_path = Path(out_folder) / f"{excel_prefix}_{ts}.xlsx"
    df = pd.DataFrame(rows)
    with pd.ExcelWriter(excel_path, engine="openpyxl") as w:
        df.to_excel(w, sheet_name="Products", index=False)
    if csv_also:
        df.to_csv(Path(out_folder) / f"{excel_prefix}_{ts}.csv", index=False)
    if jsonl_also:
        with open(Path(out_folder) / f"{excel_prefix}_{ts}.jsonl", "w", encoding="utf-8") as f:
            for r in rows:
                f.write(json.dumps(r, ensure_ascii=False) + "\n")
    return str(excel_path), len(rows), rows
 def write_master_excel(all_path: str, rows: List[Dict[str, Any]]):
    """Записать общий XLSX (один лист AllProducts). Перезаписывает файл целиком один раз в конце."""
    Path(all_path).parent.mkdir(parents=True, exist_ok=True)
    if not rows:
        # ничего не писать — пусто
        return str(all_path), 0
    # дедуп на всякий случай (по product_id|url)
    seen: set[str] = set()
    deduped: List[Dict[str, Any]] = []
    for r in rows:
        k = _key_from_fields(r.get("product_id"), r.get("url"))
        if k in seen:
            continue
        seen.add(k)
        deduped.append(r)
    df = pd.DataFrame(deduped)
    with pd.ExcelWriter(all_path, engine="openpyxl") as w:
        df.to_excel(w, sheet_name="AllProducts", index=False)
    return str(all_path), len(deduped)
--- a/Parser_NEXT/tests/fixtures/category_sample.html
+++ b/Parser_NEXT/tests/fixtures/category_sample.html
@ -0,0 +1,9 @@
 <!doctype html><html><body>
 <div data-testid="product-grid">
  <div data-testid="product-tile" data-product-id="123">
    <a href="/en/style/st123" data-testid="productLink">Open</a>
    <div data-testid="product-name">Sample Product A</div>
    <div data-testid="price">PLN 99</div>
  </div>
 </div>
 </body></html>
--- a/Parser_NEXT/tests/test_parser.py
+++ b/Parser_NEXT/tests/test_parser.py
@ -0,0 +1,9 @@
 from parser import parse_products_from_dom
 from pathlib import Path
 import yaml
 def test_dom_parse_basic():
    html = Path("tests/fixtures/category_sample.html").read_text(encoding="utf-8")
    cfg = yaml.safe_load(Path("config.yaml").read_text(encoding="utf-8"))
    lst = parse_products_from_dom(html, cfg)
    assert isinstance(lst, list)
--- a/Parser_NEXT/utils.py
+++ b/Parser_NEXT/utils.py
@ -0,0 +1,19 @@
 import logging, time, random
 from pathlib import Path
 def setup_logger():
    Path("out/logs").mkdir(parents=True, exist_ok=True)
    logging.basicConfig(
        filename="out/logs/run.log",
        level=logging.INFO,
        format="%(asctime)s %(levelname)s %(message)s"
    )
    console = logging.StreamHandler()
    console.setLevel(logging.INFO)
    formatter = logging.Formatter("%(levelname)s %(message)s")
    console.setFormatter(formatter)
    logging.getLogger().addHandler(console)
 def jitter(min_ms: int, max_ms: int):
    t = random.randint(min_ms, max_ms) / 1000.0
    time.sleep(t)
--- a/ZARAHOME/src/categories.xlsx
+++ b/ZARAHOME/src/categories.xlsx
--- a/Processing/0_01_слияние_всех
+++ b/Processing/0_01_слияние_всех
@ -28,7 +28,7 @@ def is_temp_or_hidden(name: str) -> bool:
 # === Пути ===
 script_dir = get_script_dir()
-folder_path = os.path.join(script_dir, 'Files-todo')
+folder_path = os.path.join(script_dir, 'Files-todo/manifest')
 # имя результата: All-todo-YYYYMMDD-HHMM.xlsx
 timestamp = datetime.now().strftime('%Y%m%d-%H%M')
--- a/python3.13
+++ b/python3.13
--- a/Парсер_IKEA/dictionary_main.txt
+++ b/Парсер_IKEA/dictionary_main.txt
@ -0,0 +1,12 @@
 "Wymiary" : "Размеры",
 "Szerokość" : "Ширина",
 "Głębokość" : "Глубина",
 "Obciążenie półki" : "Максимальная нагрузка на полку",
 "Opakowanie" : "Упаковка",
 "Wysokość" : "Высота",
 "Numer artykułu" : "Артикул", 
 "Długość" : "Длинна",
 "Waga" : "Вес",
 "Paczka(i)" : "Упаковок",
 "Pojemność" : "Объем",
 "Ilość w opakowaniu" : "Количество в упаковке"
--- a/Парсер_IKEA/exclusion_materials.txt
+++ b/Парсер_IKEA/exclusion_materials.txt
@ -0,0 +1 @@
 "Bambus", "szkło"
--- a/Парсер_IKEA/ikea_products_flat.xlsx
+++ b/Парсер_IKEA/ikea_products_flat.xlsx
--- a/Парсер_IKEA/leaf_categories
+++ b/Парсер_IKEA/leaf_categories
@ -0,0 +1,121 @@
 https://www.ikea.com/pl/pl/cat/haki-20617/
 https://www.ikea.com/pl/pl/cat/wieszaki-20618/
 https://www.ikea.com/pl/pl/cat/torby-i-wozki-na-zakupy-16295/
 https://www.ikea.com/pl/pl/cat/plecaki-i-torby-na-ramie-27821/
 https://www.ikea.com/pl/pl/cat/poduszki-podrozne-i-akcesoria-turystyczne-16255/
 https://www.ikea.com/pl/pl/cat/organizery-do-toreb-47441/
 https://www.ikea.com/pl/pl/cat/torby-chlodzace-46082/
 https://www.ikea.com/pl/pl/cat/akcesoria-do-przeprowadzki-46078/
 https://www.ikea.com/pl/pl/cat/kosze-lazienkowe-48940/
 https://www.ikea.com/pl/pl/cat/polki-i-akcesoria-prysznicowe-10658/
 https://www.ikea.com/pl/pl/cat/dozowniki-mydla-i-mydelniczki-10656/
 https://www.ikea.com/pl/pl/cat/szczotki-toaletowe-48944/
 https://www.ikea.com/pl/pl/cat/uchwyty-na-szczoteczki-do-zebow-48943/
 https://www.ikea.com/pl/pl/cat/akcesoria-lazienkowe-bez-wiercenia-700699/
 https://www.ikea.com/pl/pl/cat/pojemniki-na-zywnosc-20606/
 https://www.ikea.com/pl/pl/cat/zestawy-pojemnikow-na-zywnosc-700586/
 https://www.ikea.com/pl/pl/cat/akcesoria-do-przechowywania-zywnosci-i-zamykane-torby-700611/
 https://www.ikea.com/pl/pl/cat/organizery-do-lodowek-700588/
 https://www.ikea.com/pl/pl/cat/organizery-do-spizarni-i-na-blaty-robocze-700589/
 https://www.ikea.com/pl/pl/cat/sloiki-puszki-i-chlebaki-15950/
 https://www.ikea.com/pl/pl/cat/pojemniki-na-przyprawy-15951/
 https://www.ikea.com/pl/pl/cat/kubki-termiczne-i-bidony-700352/
 https://www.ikea.com/pl/pl/cat/torby-chlodzace-46082/
 https://www.ikea.com/pl/pl/cat/stojaki-na-wino-i-butelki-15952/
 https://www.ikea.com/pl/pl/cat/mix-match-pojemnikow-i-pokrywek-na-zywnosc-700610/
 https://www.ikea.com/pl/pl/cat/biurka-do-domu-20651/
 https://www.ikea.com/pl/pl/cat/produkty-akustyczne-mittzon-700539/
 https://www.ikea.com/pl/pl/cat/przegrody-do-pokoju-i-biurka-mittzon-700540/
 https://www.ikea.com/pl/pl/cat/stoly-konferencyjne-mittzon-700541/
 https://www.ikea.com/pl/pl/cat/biurka-mittzon-700542/
 https://www.ikea.com/pl/pl/cat/biurka-i-stoly-trotten-55993/
 https://www.ikea.com/pl/pl/cat/kontenerki-i-przechowywanie-trotten-55992/
 https://www.ikea.com/pl/pl/cat/blaty-i-podstawy-trotten-55991/
 https://www.ikea.com/pl/pl/cat/akcesoria-trotten-55990/
 https://www.ikea.com/pl/pl/cat/stoly-konferencyjne-trotten-700337/
 https://www.ikea.com/pl/pl/cat/przegrody-na-biurko-700336/
 https://www.ikea.com/pl/pl/cat/idasen-biurka-47426/
 https://www.ikea.com/pl/pl/cat/idasen-szafki-i-komody-biurowe-47427/
 https://www.ikea.com/pl/pl/cat/idasen-blaty-biurek-i-ramy-dolne-47425/
 https://www.ikea.com/pl/pl/cat/przegrody-na-biurko-700336/
 https://www.ikea.com/pl/pl/cat/kombinacje-biurek-stolow-18623/
 https://www.ikea.com/pl/pl/cat/nogi-i-kozly-do-stolow-biurek-11845/
 https://www.ikea.com/pl/pl/cat/blaty-do-stolow-biurek-11844/
 https://www.ikea.com/pl/pl/cat/biurko-dzieciece-relatera-700562/
 https://www.ikea.com/pl/pl/cat/blaty-i-podstawy-biurek-relatera-700563/
 https://www.ikea.com/pl/pl/cat/akcesoria-do-relatera-700564/
 https://www.ikea.com/pl/pl/cat/biurka-gamingowe-47070/
 https://www.ikea.com/pl/pl/cat/biurka-do-pracy-na-stojaco-55008/
 https://www.ikea.com/pl/pl/cat/biurka-do-biura-47069/
 https://www.ikea.com/pl/pl/cat/biurka-dla-dzieci-24714/
 https://www.ikea.com/pl/pl/cat/podstawki-i-stoliki-pod-laptopa-24830/
 https://www.ikea.com/pl/pl/cat/przegrody-na-biurko-700336/
 https://www.ikea.com/pl/pl/cat/krzesla-do-biurka-w-domu-20653/
 https://www.ikea.com/pl/pl/cat/krzesla-biurowe-20654/
 https://www.ikea.com/pl/pl/cat/krzesla-do-biurek-dla-dzieci-24715/
 https://www.ikea.com/pl/pl/cat/biurka-gamingowe-47070/
 https://www.ikea.com/pl/pl/cat/krzesla-i-fotele-gamingowe-47067/
 https://www.ikea.com/pl/pl/cat/akcesoria-gamingowe-55397/
 https://www.ikea.com/pl/pl/cat/zestawy-gamingowe-biurek-i-krzesel-56516/
 https://www.ikea.com/pl/pl/cat/stoly-konferencyjne-bekant-54173/
 https://www.ikea.com/pl/pl/cat/zestawy-stolow-konferencyjnych-i-krzesel-700424/
 https://www.ikea.com/pl/pl/cat/zestawy-biurek-i-krzesel-53249/
 https://www.ikea.com/pl/pl/cat/krzesla-konferencyjne-47068/
 https://www.ikea.com/pl/pl/cat/zarowki-led-700412/
 https://www.ikea.com/pl/pl/cat/ozdobne-zarowki-led-700413/
 https://www.ikea.com/pl/pl/cat/inteligentne-zarowki-36813/
 https://www.ikea.com/pl/pl/cat/lampy-wiszace-i-zyrandole-18751/
 https://www.ikea.com/pl/pl/cat/lampy-sufitowe-18752/
 https://www.ikea.com/pl/pl/cat/reflektory-sufitowe-18753/
 https://www.ikea.com/pl/pl/cat/zyrandole-59307/
 https://www.ikea.com/pl/pl/cat/lampy-stolowe-10732/
 https://www.ikea.com/pl/pl/cat/lampy-podlogowe-10731/
 https://www.ikea.com/pl/pl/cat/klosze-i-abazury-do-lamp-10804/
 https://www.ikea.com/pl/pl/cat/podstawy-i-oprawki-do-lamp-10805/
 https://www.ikea.com/pl/pl/cat/lampki-na-biurko-20502/
 https://www.ikea.com/pl/pl/cat/reflektory-sufitowe-18753/
 https://www.ikea.com/pl/pl/cat/reflektory-scienne-20505/
 https://www.ikea.com/pl/pl/cat/oswietlenie-szynowe-25209/
 https://www.ikea.com/pl/pl/cat/lampy-scienne-i-kinkiety-20504/
 https://www.ikea.com/pl/pl/cat/reflektory-scienne-20505/
 https://www.ikea.com/pl/pl/cat/oswietlenie-pokoju-dzieciecego-18773/
 https://www.ikea.com/pl/pl/cat/lampy-led-20516/
 https://www.ikea.com/pl/pl/cat/zarowki-led-700412/
 https://www.ikea.com/pl/pl/cat/ozdobne-zarowki-led-700413/
 https://www.ikea.com/pl/pl/cat/inteligentne-zarowki-36813/
 https://www.ikea.com/pl/pl/cat/paski-led-57542/
 https://www.ikea.com/pl/pl/cat/inteligentne-lampy-59308/
 https://www.ikea.com/pl/pl/cat/lampy-przenosne-700512/
 https://www.ikea.com/pl/pl/cat/inteligentne-zarowki-36813/
 https://www.ikea.com/pl/pl/cat/inteligentne-oswietlenie-zintegrowane-42248/
 https://www.ikea.com/pl/pl/cat/urzadzenia-sterujace-i-akcesoria-36814/
 https://www.ikea.com/pl/pl/cat/zestawy-inteligentnego-oswietlenia-36815/
 https://www.ikea.com/pl/pl/cat/inteligentne-lampy-59308/
 https://www.ikea.com/pl/pl/cat/panele-led-sufitowe-36816/
 https://www.ikea.com/pl/pl/cat/oswietlenie-szafek-lazienkowych-55010/
 https://www.ikea.com/pl/pl/cat/oswietlenie-mebli-kuchennych-16282/
 https://www.ikea.com/pl/pl/cat/oswietlenie-regalow-16281/
 https://www.ikea.com/pl/pl/cat/oswietlenie-szaf-16283/
 https://www.ikea.com/pl/pl/cat/oswietlenie-szafek-lazienkowych-55010/
 https://www.ikea.com/pl/pl/cat/oswietlenie-sufitowe-do-lazienki-700215/
 https://www.ikea.com/pl/pl/cat/oswietlenie-scienne-do-lazienki-700214/
 https://www.ikea.com/pl/pl/cat/lustra-z-oswietleniem-49138/
 https://www.ikea.com/pl/pl/cat/zewnetrzne-lampy-podlogowe-700615/
 https://www.ikea.com/pl/pl/cat/zewnetrzne-lampy-scienne-700616/
 https://www.ikea.com/pl/pl/cat/lampy-stolowe-zewnetrzne-700617/
 https://www.ikea.com/pl/pl/cat/lampy-wiszace-zewnetrzne-700618/
 https://www.ikea.com/pl/pl/cat/zewnetrzne-lancuchy-swietlne-700619/
 https://www.ikea.com/pl/pl/cat/oswietlenie-sciezek-700620/
 https://www.ikea.com/pl/pl/cat/lampiony-i-latarenki-do-zewnatrz-54942/
 https://www.ikea.com/pl/pl/cat/dekoracje-swietlne-stolu-700179/
 https://www.ikea.com/pl/pl/cat/dekoracyjne-lampy-wiszace-700177/
 https://www.ikea.com/pl/pl/cat/swiatla-lancuchowe-700180/
 https://www.ikea.com/pl/pl/cat/swiece-led-39266/
 https://www.ikea.com/pl/pl/cat/oswietlenie-dekoracyjne-led-54943/
 https://www.ikea.com/pl/pl/cat/paski-led-57542/
 https://www.ikea.com/pl/pl/cat/sofy-tapicerowane-2-osobowe-10668/
 https://www.ikea.com/pl/pl/cat/sofy-tapicerowane-3-osobowe-10670/
 https://www.ikea.com/pl/pl/cat/sofy-materialowe-z-szezlongami-47388/
 https://www.ikea.com/pl/pl/cat/narozniki-tapicerowane-10671/
 https://www.ikea.com/pl/pl/cat/sekcje-sofy-modulowej-31786/
--- a/Парсер_IKEA/leaf_categories.txt
+++ b/Парсер_IKEA/leaf_categories.txt
@ -1 +1,5 @@
-https://www.ikea.com/pl/pl/cat/poduszki-ergonomiczne-46083/
+
 https://www.ikea.com/pl/pl/cat/akcesoria-do-przechowywania-zywnosci-i-zamykane-torby-700611/
 https://www.ikea.com/pl/pl/cat/organizery-do-lodowek-700588/
 https://www.ikea.com/pl/pl/cat/organizery-do-spizarni-i-na-blaty-robocze-700589/
 https://www.ikea.com/pl/pl/cat/sloiki-puszki-i-chlebaki-15950/
--- a/Парсер_IKEA/leaf_categories.xlsx
+++ b/Парсер_IKEA/leaf_categories.xlsx
--- a/Парсер_IKEA/links.txt
+++ b/Парсер_IKEA/links.txt
@ -1,57 +1 @@
-https://www.ikea.com/pl/pl/p/majgull-zaslony-zacieniajace-1-para-bezowozolty-na-tasmie-70586026/
+https://www.ikea.com/pl/pl/cat/zestawy-pojemnikow-na-zywnosc-700586
 https://www.ikea.com/pl/pl/p/majgull-zaslony-zaciemniajace-para-szary-na-tasmie-80417815/
 https://www.ikea.com/pl/pl/p/majgull-zaslony-zaciemniajace-para-szary-na-tasmie-50417812/
 https://www.ikea.com/pl/pl/p/majgull-zaslony-zaciemniajace-para-ciemnozielony-na-tasmie-30586033/
 https://www.ikea.com/pl/pl/p/maesterrot-zaslona-2-szt-bialy-bialy-kratka-na-tasmie-20602496/
 https://www.ikea.com/pl/pl/p/maesterrot-zaslona-2-szt-bezowy-bialy-wzor-w-kropki-na-tasmie-00602567/
 https://www.ikea.com/pl/pl/p/loennstaevmal-zaslony-zaciemniajace-para-jasny-czerwono-brazowy-na-tasmie-50556370/
 https://www.ikea.com/pl/pl/p/loennstaevmal-zaslony-zaciemniajace-para-jasnooliwkowy-na-tasmie-80556335/
 https://www.ikea.com/pl/pl/p/loennstaevmal-zaslony-zaciemniajace-para-bezowy-na-tasmie-70556374/
 https://www.ikea.com/pl/pl/p/lillyana-firanki-2-szt-bialy-kwiat-na-tunelu-30386524/
 https://www.ikea.com/pl/pl/p/lill-firanki-1-para-bialy-na-tunelu-10070262/
 https://www.ikea.com/pl/pl/p/lenda-zaslona-z-wiazaniem-2-szt-kremowy-na-tasmie-50552881/
 https://www.ikea.com/pl/pl/p/lenda-zaslona-z-wiazaniem-2-szt-jasny-szarozielony-na-tasmie-90559197/
 https://www.ikea.com/pl/pl/p/lenda-zaslona-z-wiazaniem-2-szt-ciemnoszary-na-tasmie-60552871/
 https://www.ikea.com/pl/pl/p/lenda-zaslona-z-wiazaniem-2-szt-brazowoczerwony-na-tasmie-30559195/
 https://www.ikea.com/pl/pl/p/korgmott-zaslony-zaciemniajace-para-ciemnoszary-na-tasmie-30597159/
 https://www.ikea.com/pl/pl/p/korgmott-zaslony-zaciemniajace-para-bialy-na-tasmie-40597149/
 https://www.ikea.com/pl/pl/p/korgmott-zaslony-zaciemniajace-para-bezowy-na-tasmie-50597158/
 https://www.ikea.com/pl/pl/p/hilja-zaslona-2-szt-szary-na-tasmie-90390735/
 https://www.ikea.com/pl/pl/p/hilja-zaslona-2-szt-bialy-na-tasmie-50430818/
 https://www.ikea.com/pl/pl/p/hilja-zaslona-2-szt-bialy-na-tasmie-40430814/
 https://www.ikea.com/pl/pl/p/hildrun-firanki-2-szt-bialy-w-kropki-na-tunelu-00386549/
 https://www.ikea.com/pl/pl/p/haellebraecka-firanki-2-szt-jasnobezowy-na-tasmie-00556848/
 https://www.ikea.com/pl/pl/p/haellebraecka-firanki-2-szt-bialy-na-tasmie-70559674/
 https://www.ikea.com/pl/pl/p/haeggveckmal-zaslony-zacieniajace-1-para-ciemnozielony-na-tasmie-00569110/
 https://www.ikea.com/pl/pl/p/haeggveckmal-zaslony-zacieniajace-1-para-ciemnoszary-na-tasmie-00562123/
 https://www.ikea.com/pl/pl/p/haeggveckmal-zaslony-zacieniajace-1-para-bezowy-na-tasmie-20569029/
 https://www.ikea.com/pl/pl/p/glesgroee-firanki-2-szt-szary-na-tasmie-30548989/
 https://www.ikea.com/pl/pl/p/gjertrud-firanki-2-szt-bialy-na-tasmie-30386538/
 https://www.ikea.com/pl/pl/p/gjertrud-firanka-1-szt-bialy-na-tasmie-60558948/
 https://www.ikea.com/pl/pl/p/ginstmott-zaslona-2-szt-zolty-na-tasmie-60597186/
 https://www.ikea.com/pl/pl/p/ginstmott-zaslona-2-szt-rozowy-na-tasmie-50597182/
 https://www.ikea.com/pl/pl/p/ginstmott-zaslona-2-szt-granatowy-na-tasmie-80597190/
 https://www.ikea.com/pl/pl/p/ginstmott-zaslona-2-szt-bialy-z-oczkami-30605418/
 https://www.ikea.com/pl/pl/p/ginstmott-zaslona-2-szt-bialy-na-tasmie-60597167/
 https://www.ikea.com/pl/pl/p/ginstmott-zaslona-2-szt-bezowy-z-oczkami-70605416/
 https://www.ikea.com/pl/pl/p/ginstmott-zaslona-2-szt-bezowy-na-tasmie-40597173/
 https://www.ikea.com/pl/pl/p/foensterbomal-magnetyczna-opaska-do-zaslon-bezowy-70498553/
 https://www.ikea.com/pl/pl/p/fjaedermott-zaslona-2-szt-bialy-szary-na-tasmie-70504587/
 https://www.ikea.com/pl/pl/p/dytag-zaslona-2-szt-szarozielony-na-tasmie-80552889/
 https://www.ikea.com/pl/pl/p/dytag-zaslona-2-szt-len-na-tasmie-80607820/
 https://www.ikea.com/pl/pl/p/dytag-zaslona-2-szt-granatowy-na-tasmie-90552493/
 https://www.ikea.com/pl/pl/p/dytag-zaslona-2-szt-bialy-na-tasmie-20466719/
 https://www.ikea.com/pl/pl/p/bymott-zaslona-2-szt-bialy-jasnoszary-w-paski-na-tasmie-30466686/
 https://www.ikea.com/pl/pl/p/bymott-zaslona-2-szt-bialy-bezowy-w-paski-na-tasmie-80509971/
 https://www.ikea.com/pl/pl/p/bruksvara-zaslona-zacieniajaca-1-szt-szary-na-tasmie-10574248/
 https://www.ikea.com/pl/pl/p/bruksvara-zaslona-zacieniajaca-1-szt-jasnoniebieski-na-tasmie-50574171/
 https://www.ikea.com/pl/pl/p/bergklematis-magnetyczna-opaska-do-zaslon-szary-90498552/
 https://www.ikea.com/pl/pl/p/bengta-zaslona-zaciemniajaca-1-szt-zielony-na-tasmie-10602166/
 https://www.ikea.com/pl/pl/p/bengta-zaslona-zaciemniajaca-1-szt-bezowy-na-tasmie-30602090/
 https://www.ikea.com/pl/pl/p/annakajsa-zaslony-zacieniajace-1-para-jasnoszary-na-tasmie-60583429/
 https://www.ikea.com/pl/pl/p/annakajsa-zaslony-zacieniajace-1-para-ciemnozielony-na-tasmie-70586074/
 https://www.ikea.com/pl/pl/p/annakajsa-zaslony-zacieniajace-1-para-bezowy-na-tasmie-30462792/
 https://www.ikea.com/pl/pl/p/annakajsa-zaslony-zacieniajace-1-para-antracyt-na-tasmie-40583430/
 https://www.ikea.com/pl/pl/p/alvine-spets-firanki-1-para-kremowy-na-tunelu-80070763/
 https://www.ikea.com/pl/pl/p/alvine-spets-firanka-1-szt-bialy-na-tunelu-50559811/
 https://www.ikea.com/pl/pl/p/aengsfryle-firanka-1-szt-bialy-na-tasmie-70569220/
--- a/Парсер_IKEA/log_all_CatProd.txt
+++ b/Парсер_IKEA/log_all_CatProd.txt
--- a/Парсер_IKEA/main.py
+++ b/Парсер_IKEA/main.py
@ -1,15 +1,38 @@
-import requests
+#!/usr/bin/env python3
-import json
+# -*- coding: utf-8 -*-
-import os
+
-import html
+import os, json, re, math, time, html, requests, datetime
 from bs4 import BeautifulSoup
 from openpyxl import Workbook
-BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+# ───────────────────────── ПУТИ / ФАЙЛЫ ───────────────────────────
-INPUT_FILE = os.path.join(BASE_DIR, "product_links.txt")
+BASE_DIR       = os.path.dirname(os.path.abspath(__file__))
-OUTPUT_FILE = os.path.join(BASE_DIR, "result.xlsx")
+RECORDS_DIR    = os.path.join(BASE_DIR, "records_folder")
 os.makedirs(RECORDS_DIR, exist_ok=True)
 INPUT_FILE     = os.path.join(BASE_DIR, "product_links.txt")
 OUTPUT_FILE    = os.path.join(RECORDS_DIR, "records.xlsx")
 DICT_FILE      = os.path.join(BASE_DIR, "dictionary_main.txt")
 EXCL_FILE      = os.path.join(BASE_DIR, "exclusion_materials.txt")
 POST_LOG       = os.path.join(RECORDS_DIR, "post_log.txt")
 # ───────────────────────── НАСТРОЙКИ POST ─────────────────────────
 '''
 На старте спросим:
 - сохранять ли JSON батчи на диск
 - отправлять ли батчи на API
 Ответ: 1 (да) / 0 (нет). Пустой ввод = 1.
 '''
 POST_URL     = os.getenv("IKEA_POST_URL", "http://localhost:3005/parser/data")
 POST_API_KEY = os.getenv("IKEA_POST_API_KEY", "")
 POST_TIMEOUT = 20
 BATCH_SIZE   = 50
 # ───────────────────────── НАСТРОЙКИ САЙТА ────────────────────────
 HEADERS = {"User-Agent": "Mozilla/5.0"}
 CSS_SELECTOR = ".pip-product__subgrid.product-pip.js-product-pip"
 BLOCKS = [
    "buyModule",
    "productSummary",
@ -18,9 +41,12 @@ BLOCKS = [
    "keyFacts",
    "stockcheckSection",
    "availabilityGroup",
-    "productGallery"
+    "productGallery",
 ]
-# ── какие колонки сохраняем ─────────────────────────────────────────
+
 '''
 Whitelist колонок для Excel.
 '''
 KEEP_COLUMNS = [
    "availabilityGroup.serverOnlineSellable",
    "availabilityGroup.storeHeader",
@ -31,41 +57,140 @@ KEEP_COLUMNS = [
    "keyFacts.ariaLabels",
    "keyFacts.gaLabel",
    "keyFacts.keyFacts",
    "keyFacts.keyFacts_formatted",
    "pipPricePackage.measurementText",
    "pipPricePackage.productDescription",
    "productGallery.urls",
    "productInformationSection.dimensionProps",
    "productInformationSection.dimensionProps_formatted",
    "productInformationSection.dimensionProps_formatted_html_translated",
    "productInformationSection.productDetailsProps",
    "productInformationSection.productDetailsProps_formatted",
    "productInformationSection.productDetailsProps_formatted_html",
    "productSummary.description",
    "productSummary.visibleItemNo",
    "stockcheckSection.packagingProps",
    "stockcheckSection.typeName",
-    "url",
+    "total brutto",
    "prductVariantColorMeasure",
    "categoryBreadcrumb",
    "originalName",  # ### NEW: колонка для Excel
    "url",
 ]
 # ───────────────────────── УТИЛИТЫ I/O ────────────────────────────
 def ask_bool(prompt: str, default: str = "1") -> bool:
    '''
    Спрашивает 1/0; пустой ввод → default.
    '''
    try:
        val = input(f"{prompt} (1=yes, 0=no) [{default}]: ").strip() or default
    except EOFError:
        val = default
    return val == "1"
 def _post_log(msg: str):
    '''Пишем строку в post_log.txt (молча игнорируем ошибки).'''
    try:
        with open(POST_LOG, "a", encoding="utf-8") as f:
            f.write(msg.rstrip() + "\n")
    except Exception:
        pass
 def _now_tag():
    return datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
 def _save_json_batch(payload: dict, batch_index: int):
    fname = f"ikea_batch_{_now_tag()}_{batch_index:04d}.json"
    fpath = os.path.join(RECORDS_DIR, fname)
    with open(fpath, "w", encoding="utf-8") as fh:
        json.dump(payload, fh, ensure_ascii=False, indent=2)
    print(f"💾 JSON saved: {fname}")
    return fpath
 # ───────────────────────── СЛОВАРИ / ФИЛЬТРЫ ──────────────────────
 def load_dictionary(path: str) -> dict:
    '''
    Читает словарь переводов:
      "Wymiary" : "Размеры",
      ...
    '''
    if not os.path.isfile(path):
        return {}
    txt = open(path, "r", encoding="utf-8").read()
    pairs = re.findall(r'"([^"]+)"\s*:\s*"([^"]+)"', txt)
    return {k: v for k, v in pairs}
 DICT = load_dictionary(DICT_FILE)
 def translate_token(token: str) -> str:
    return DICT.get(token, token)
 def load_exclusions(path: str) -> set:
    '''
    Загружает токены исключений из exclusion_materials.txt:
     - можно по одному на строку
     - или через запятую
     - регистр игнорируем
    '''
    if not os.path.isfile(path):
        return set()
    txt = open(path, "r", encoding="utf-8").read()
    # сначала из кавычек, если есть:
    quoted = re.findall(r'"([^"]+)"', txt, flags=re.S)
    tokens = quoted if quoted else re.split(r"[,;\n\r]+", txt)
    return {t.strip().lower() for t in tokens if t.strip()}
 EXCLUSIONS = load_exclusions(EXCL_FILE)
 def materials_from_details_json(details: dict) -> list[str]:
    '''
    Извлекаем ВСЕ строки из ключей "material" на любой глубине productDetailsProps.
    Встречаются разные схемы, поэтому делаем обход рекурсивно.
    '''
    out = []
    def walk(node):
        if isinstance(node, dict):
            for k, v in node.items():
                if k == "material" and isinstance(v, str):
                    out.append(v)
                else:
                    walk(v)
        elif isinstance(node, list):
            for x in node:
                walk(x)
    walk(details or {})
    return out
 def materials_match_exclusions(details: dict, exclusion_tokens: set) -> bool:
    '''
    True — если хоть один токен встречается в любом material (case-insensitive).
    '''
    if not exclusion_tokens:
        return False
    mats = materials_from_details_json(details)
    joined = "\n".join(mats).lower()
    return any(tok in joined for tok in exclusion_tokens)
 # ───────────────────────── ФОРМАТТЕРЫ ─────────────────────────────
 def _parse_json_value(val):
    if isinstance(val, (dict, list)) or val is None:
        return val
    if isinstance(val, str):
        s = val.strip()
        if not s:
            return val
        try:
            return json.loads(s)
        except Exception:
            return val
    return val
 def flatten_block(block_name, data):
    if not isinstance(data, dict):
        return {}
    flat = {}
    for k, v in data.items():
        '''
        # === 1. dimensionProps.images ===
        if block_name == "productInformationSection" and k == "dimensionProps":
            if isinstance(v, dict):
                urls = []
                for img in v.get("images", []):
                    if isinstance(img, dict):
                        url = img.get("url")
                        if url:
                            urls.append(url)
                flat[f"{key_name}.images_urls"] = "\n".join(urls)
            continue
            '''
        # === 2. mediaList.content.url → productGallery.urls
        if block_name == "productGallery" and k == "mediaList":
            if isinstance(v, list):
                urls = []
@ -74,33 +199,242 @@ def flatten_block(block_name, data):
                    if isinstance(content, dict) and "url" in content:
                        urls.append(content["url"])
                flat["productGallery.urls"] = "\n".join(urls)
-            return flat  # ⬅ возвращаем только urls, остальные поля игнорируем
+            return flat
            continue
        # === Остальные поля — по умолчанию ===
        key = f"{block_name}.{k}"
        flat[key] = v
    return flat
 def format_keyfacts(raw_keyfacts):
    if not isinstance(raw_keyfacts, list):
        return ""
    out = []
    header_added = False
    for el in raw_keyfacts:
        lbl = (el or {}).get("label")
        name = (el or {}).get("name", "Właściwości")
        if not header_added:
            out.append(name)
            header_added = True
        if lbl:
            out.append(lbl)
    return "\n".join(out)
-
+def _fmt_float(x):
 def extract_data(url):
    """
    Возвращает словарь с нужными полями товара IKEA.
    + NEW: добавляет ключ 'categoryBreadcrumb' вида
      'Produkty/Tekstylia/Tekstylia do sypialni/Narzuty na łóżko'
      (берётся из JSON-LD BreadcrumbList).
    """
    try:
-        response = requests.get(url, timeout=10,
+        return f"{float(x):.2f}".rstrip("0").rstrip(".")
-                                headers={"User-Agent": "Mozilla/5.0"})
+    except Exception:
-        response.raise_for_status()
+        return ""
-        soup = BeautifulSoup(response.text, "html.parser")
+
 def _collect_packaging_total_kg(packaging):
    total = 0.0
    if not isinstance(packaging, dict):
        return total
    content = (packaging.get("contentProps") or {}).get("packages") or []
    for pkg in content:
        qty = ((pkg.get("quantity") or {}).get("value")) or 1
        ms  = pkg.get("measurements") or []
        for block in ms:
            if not isinstance(block, list):
                continue
            weight_lbl = next((m for m in block if (m.get("type") == "weight" or m.get("label") == "Waga")), None)
            if weight_lbl and isinstance(weight_lbl.get("value"), (int, float)):
                total += float(weight_lbl["value"]) * (qty or 1)
    return total
 def format_dimensions(raw_dim_props, with_html=False, translated=False):
    if not isinstance(raw_dim_props, dict):
        return ""
    lines = []
    br = "<br/>" if with_html else "\n"
    title = translate_token("Wymiary") if translated else "Wymiary"
    lines.append(f"<strong>{title}</strong>" if with_html else title)
    for d in raw_dim_props.get("dimensions", []):
        name = d.get("name", "")
        meas = d.get("measure", "")
        if not name and not meas:
            continue
        if translated:
            name_t = translate_token(name)
            line = f"{name_t}: {meas}".strip()
        else:
            line = f"{name}: {meas}".strip()
        lines.append(line)
    pack = (raw_dim_props.get("packaging") or {})
    pack_title = translate_token("Opakowanie") if translated else "Opakowanie"
    lines.append(br if with_html else "")
    lines.append(f"<strong>{pack_title}</strong>" if with_html else pack_title)
    content = (pack.get("contentProps") or {}).get("packages") or []
    for pkg in content:
        name = pkg.get("name") or ""
        if name:
            lines.append(name)
        art = (pkg.get("articleNumber") or {}).get("value")
        if art:
            art_lbl = "Numer artykułu"
            if translated:
                art_lbl = translate_token(art_lbl)
            lines.append(art_lbl)
            lines.append(f"{art}")
        ms = pkg.get("measurements") or []
        for block in ms:
            if not isinstance(block, list):
                continue
            for m in block:
                lbl = m.get("label", "")
                txt = m.get("text", "")
                if translated:
                    lbl = translate_token(lbl) if lbl else lbl
                if lbl or txt:
                    lines.append(f"{lbl}: {txt}".strip(": "))
        q_val = ((pkg.get("quantity") or {}).get("value"))
        if q_val:
            q_lbl = "Paczka(i)"
            if translated:
                q_lbl = translate_token(q_lbl)
            lines.append(f"{q_lbl}: {q_val}")
    if with_html:
        s = br.join([x for x in lines if x is not None])
        s = re.sub(r"(" + re.escape(br) + r"){2,}", br*2, s)
        s = s.strip(br)
        # ### NEW: страховка — иногда первая "<" теряется в Excel-предпросмотре
        if s.startswith("strong>"):
            s = "<" + s
        return s
    return "\n".join([x for x in lines if x is not None]).strip()
 def format_product_details(raw_details, add_summary_desc="", with_html=False, skip_assembly=True):
    if not isinstance(raw_details, dict):
        return add_summary_desc if with_html else add_summary_desc
    br = "<br/>" if with_html else "\n"
    out = []
    if add_summary_desc:
        out.append(add_summary_desc)
        out.append(br if with_html else "")
    t1 = "Informacje o produkcie"
    out.append(f"<strong>{t1}</strong>" if with_html else t1)
    pd = (raw_details.get("productDescriptionProps") or {})
    paragraphs = pd.get("paragraphs") or []
    for p in paragraphs:
        out.append(p)
    dlabel = pd.get("designerLabel")
    dname  = pd.get("designerName")
    if dlabel and dname:
        out.append(dlabel)
        out.append(dname)
    if raw_details.get("productId"):
        out.append("Numer artykułu")
        out.append(raw_details["productId"])
    acc = (raw_details.get("accordionObject") or {})
    gk = ((acc.get("goodToKnow") or {}).get("contentProps") or {}).get("goodToKnow") or []
    if gk:
        out.append(br if with_html else "")
        t2 = "Dobrze wiedzieć"
        out.append(f"<strong>{t2}</strong>" if with_html else t2)
        for item in gk:
            txt = item.get("text")
            if txt:
                out.append(txt)
    mac = (acc.get("materialsAndCare") or {}).get("contentProps") or {}
    mats = mac.get("materials") or []
    care = mac.get("careInstructions") or []
    t3 = "Materiały i pielęgnacja"
    if mats or care:
        out.append(br if with_html else "")
        out.append(f"<strong>{t3}</strong>" if with_html else t3)
    if mats:
        out.append("Materiały")
        for m in mats:
            ptype = m.get("productType", "")
            for mat in (m.get("materials") or []):
                material = mat.get("material", "")
                if ptype:
                    out.append(ptype)
                if material:
                    out.append(material)
    if care:
        detailsCareText = mac.get("detailsCareText", "Pielęgnacja")
        out.append(detailsCareText)
        for c in care:
            ptype = c.get("productType", "")
            texts = c.get("texts") or []
            if ptype:
                out.append(ptype)
            for t in texts:
                out.append(t)
    safety = (raw_details.get("safetyAndCompliance") or {}).get("contentProps") or {}
    sc = safety.get("safetyAndCompliance") or []
    if sc:
        out.append(br if with_html else "")
        t4 = "Bezpieczeństwo i zgodność z przepisami"
        out.append(f"<strong>{t4}</strong>" if with_html else t4)
        for s in sc:
            txt = s.get("text")
            if txt:
                out.append(txt)
    '''
    ### Был блок сборки "Instrukcja montażu" — по вашему запросу отключён.
    if not skip_assembly:
        ...
    '''
    if with_html:
        s = br.join([x for x in out if x is not None])
        s = re.sub(r"(" + re.escape(br) + r"){2,}", br*2, s)
        return s.strip(br)
    return "\n".join([x for x in out if x is not None]).strip()
 def build_variant_color_measure(desc: str, type_name: str, measurement: str) -> str:
    s = (desc or "")
    t = (type_name or "").strip()
    if t:
        pattern = r"^\s*" + re.escape(t) + r"[\s,;:\-–—/]*"
        s = re.sub(pattern, "", s, flags=re.IGNORECASE)
    if not re.search(r"[0-9A-Za-zА-Яа-яЁёÀ-ž]", s or ""):
        s = ""
    s = s.strip()
    meas = (measurement or "").strip()
    if not s:
        return meas if meas else ""
    s = s[:1].upper() + s[1:]
    return f"{s}, {meas}" if meas else s
 # ───────────────────── СКРАПИНГ КАРТОЧКИ ──────────────────────────
 def extract_data(url: str) -> dict:
    '''
    Возвращает плоский dict с полями KEEP_COLUMNS.
    Форматтеры/подсчёты: keyFacts_formatted, dimensionProps_formatted,
    dimensionProps_formatted_html_translated, productDetailsProps_formatted,
    productDetailsProps_formatted_html, total brutto, prductVariantColorMeasure, categoryBreadcrumb.
    '''
    try:
        resp = requests.get(url, headers=HEADERS, timeout=15)
        resp.raise_for_status()
        soup = BeautifulSoup(resp.text, "html.parser")
        # ── основной JSON из data-hydration-props ──────────────────
        target = soup.select_one(CSS_SELECTOR)
        if not target:
            return {"url": url, "error": "CSS selector not found"}
@ -109,92 +443,271 @@ def extract_data(url):
        if not raw:
            return {"url": url, "error": "data-hydration-props not found"}
-        decoded    = html.unescape(raw)
+        decoded   = html.unescape(raw)
-        full_json  = json.loads(decoded)
+        full_json = json.loads(decoded)
        result     = {"url": url}
-        # вытаскиваем нужные блоки
+        result = {"url": url}
        for block in BLOCKS:
            result.update(flatten_block(block, full_json.get(block, {})))
-        # ── NEW: извлекаем BreadcrumbList → categoryBreadcrumb ────
+        kf_json  = _parse_json_value(result.get("keyFacts.keyFacts"))
        dim_json = _parse_json_value(result.get("productInformationSection.dimensionProps"))
        det_json = _parse_json_value(result.get("productInformationSection.productDetailsProps"))
        result["keyFacts.keyFacts_formatted"] = format_keyfacts(kf_json)
        result["productInformationSection.dimensionProps_formatted"] = format_dimensions(dim_json, with_html=False, translated=False)
        html_trans = format_dimensions(dim_json, with_html=True, translated=True)
        # ### NEW: дополнительная страховка — если вдруг нет '<' в начале:
        if isinstance(html_trans, str) and html_trans.startswith("strong>"):
            html_trans = "<" + html_trans
        result["productInformationSection.dimensionProps_formatted_html_translated"] = html_trans
        total_kg = _collect_packaging_total_kg((dim_json or {}).get("packaging") or {})
        result["total brutto"] = _fmt_float(total_kg)
        summary_desc = result.get("productSummary.description", "") or ""
        result["productInformationSection.productDetailsProps_formatted"] = format_product_details(det_json, add_summary_desc=summary_desc, with_html=False, skip_assembly=True)
        result["productInformationSection.productDetailsProps_formatted_html"] = format_product_details(det_json, add_summary_desc=summary_desc, with_html=True,  skip_assembly=True)
        desc  = result.get("pipPricePackage.productDescription", "") or ""
        tname = result.get("stockcheckSection.typeName", "") or ""
        meas  = result.get("pipPricePackage.measurementText", "") or ""
        result["prductVariantColorMeasure"] = build_variant_color_measure(desc, tname, meas)
        # breadcrumb
        breadcrumb = None
-        for tag in soup.find_all("script",
+        for tag in soup.find_all("script", attrs={"type": lambda t: t and "ld+json" in t}):
                                 attrs={"type": lambda t: t and "ld+json" in t}):
            try:
                data = json.loads(tag.string)
            except Exception:
                continue
            # если это массив JSON-LD, ищем в нём объект Product / Breadcrumb
            if isinstance(data, list):
-                data = next((d for d in data
+                data = next((d for d in data if isinstance(d, dict) and d.get("@type") == "BreadcrumbList"), None)
                             if d.get("@type") == "BreadcrumbList"), None)
            if isinstance(data, dict) and data.get("@type") == "BreadcrumbList":
                items = data.get("itemListElement", [])
                names = [it.get("name", "") for it in items]
                breadcrumb = "/".join(names)
-                break   # нашли нужный блок – выходим из цикла
+                break
        if breadcrumb:
            result["categoryBreadcrumb"] = breadcrumb
-        return result
+        # применяем whitelist
        filtered = {k: result.get(k) for k in KEEP_COLUMNS if k != "originalName"}
        '''
        ### NEW: originalName = productName + " " + typeName (без двойных пробелов)
        '''
        pn = (result.get("buyModule.productName") or "").strip()
        tn = (result.get("stockcheckSection.typeName") or "").strip()
        if pn and tn:
            orig_name = f"{pn} {tn}"
        else:
            orig_name = pn or tn
        filtered["originalName"] = orig_name
        return filtered
    except Exception as e:
        return {"url": url, "error": str(e)}
 # ───────────────────── ПОСТРОЕНИЕ ВАРИАНТА / POST ─────────────────
 def _split_color_size(text: str):
    if not text:
        return "", ""
    parts = [p.strip() for p in text.split(",", 1)]
    if len(parts) == 2:
        return parts[0], parts[1]
    return "", parts[0]
 def _ceil_price(v):
    try:
        return int(math.ceil(float(v)))
    except Exception:
        return None
 def _ceil_int(v):
    try:
        return int(math.ceil(float(v)))
    except Exception:
        return None
 def build_variant(row: dict) -> dict:
    category_name = row.get("categoryBreadcrumb") or ""
    brand_name    = "ikea"
    visible = row.get("productSummary.visibleItemNo") or ""
    sku = visible.replace(" ", "")
    csm = (row.get("prductVariantColorMeasure") or "").strip()
    color, size = _split_color_size(csm)
    if not color and not size:
        size = (row.get("pipPricePackage.measurementText") or "").strip()
    cost = _ceil_price(row.get("buyModule.productPrice"))
    url  = row.get("url") or ""
    '''
    ### NEW: originalName берём из одноимённой колонки (а не только из productName)
    '''
    name = row.get("originalName") or row.get("buyModule.productName") or ""
    desc_html = row.get("productInformationSection.productDetailsProps_formatted_html") or ""
    '''
    ### NEW: originalComposition = HTML из dimensionProps_formatted_html_translated
    '''
    composition_html = row.get("productInformationSection.dimensionProps_formatted_html_translated") or ""
    imgs = []
    raw_imgs = row.get("productGallery.urls") or ""
    if isinstance(raw_imgs, str):
        imgs = [x for x in raw_imgs.split("\n") if x.strip()]
    in_stock = bool(row.get("availabilityGroup.serverOnlineSellable"))
    if not in_stock:
        in_stock = bool(row.get("buyModule.onlineSellable"))
    weight_kg = _ceil_int(row.get("total brutto"))
    variant = {
        "status_id": 1,
        "color": color.capitalize() if color else "none",
        "sku": sku,
        "size": size,
        "cost": cost,
        "originalUrl": url,
        "originalName": name,                 # ← ### NEW: в JSON сохраняем originalName
        "originalDescription": desc_html,
        "originalComposition": composition_html,  # ← ### NEW
        "images": imgs,
        "inStock": in_stock,
        "weight": weight_kg if weight_kg is not None else 0,
    }
    return {
        #"category": {"name": category_name},
        "category": {"name": "TEST/IKEA"},
        "brand":    {"name": "ikea"},
        "variant":  variant,
    }
 def post_payload(payload: dict) -> dict:
    headers = {"Content-Type": "application/json"}
    if POST_API_KEY:
        headers["Authorization"] = f"Bearer {POST_API_KEY}"
    body = json.dumps(payload, ensure_ascii=False)
    _post_log(f"→ POST {POST_URL}\nHeaders: {headers}\nBody: {body}")
    try:
        r = requests.post(POST_URL, headers=headers, data=body.encode("utf-8"), timeout=POST_TIMEOUT)
        text = r.text
        _post_log(f"← {r.status_code}\n{text}\n{'-'*60}")
        ok = 200 <= r.status_code < 300
        return {"ok": ok, "status": r.status_code, "response": text}
    except Exception as e:
        _post_log(f"× ERROR: {e}\n{'-'*60}")
        return {"ok": False, "status": None, "error": str(e)}
 # ───────────────────────── СЕРДЦЕ СКРИПТА ─────────────────────────
 def safe_cell(val):
    if isinstance(val, (dict, list)):
        return json.dumps(val, ensure_ascii=False)
    return "" if val is None else val
 def main():
-    # ── читаем ссылки ────────────────────────────────────────────
+    SAVE_JSON = ask_bool("SAVE_JSON (сохранять JSON на диск?)", "1")
    SEND_JSON = ask_bool("SEND_JSON (отправлять на API?)", "1")
    # читаем ссылки
    with open(INPUT_FILE, "r", encoding="utf-8") as f:
        links = [line.strip() for line in f if line.strip()]
    print(f"Всего ссылок: {len(links)}")
-    rows = []
+    # готовим Excel
    wb = Workbook()
    ws = wb.active
    ws.title = "IKEA Products"
    ws.append(KEEP_COLUMNS)
-    # ---- РЕЖИМ КОЛОНОК -----------------------------------------
+    # батч для JSON/API
-    # NEW: фиксированный список колонок (см. KEEP_COLUMNS вверху)
+    batch_items = []
-    all_columns = KEEP_COLUMNS
+    batch_index = 1
-    # OLD (восстановить-если-нужно):
+    def flush_batch():
-    # all_columns = set()                           # ← копил все поля
+        nonlocal batch_items, batch_index
-    # ------------------------------------------------------------
+        if not batch_items:
            return
        payload = {"parserName": "ikea", "items": batch_items}
        if SAVE_JSON:
            _save_json_batch(payload, batch_index)
        if SEND_JSON:
            res = post_payload(payload)
            ok = res.get("ok")
            print(f"POST batch {batch_index}: {'OK' if ok else 'FAIL'} (status={res.get('status')})")
        batch_index += 1
        batch_items = []
    print("🔍 Извлечение данных...")
    for idx, link in enumerate(links, 1):
        print(f"[{idx}/{len(links)}] {link}")
        row = extract_data(link)
-        # NEW: оставляем только нужные 17 полей
+        '''
-        row = {k: v for k, v in row.items() if k in KEEP_COLUMNS}
+        ### NEW: originalName уже сформирован в extract_data и попал в row
        '''
-        # OLD (восстановить-если-нужно):
+        # пишем в Excel ВСЁ (без фильтров)
-        # all_columns.update(row.keys())            # ← собирал все ключи
+        ws.append([safe_cell(row.get(col, "")) for col in KEEP_COLUMNS])
-        rows.append(row)
+        # ФИЛЬТРЫ для JSON/API
        try:
            price = float(row.get("buyModule.productPrice") or 0)
        except Exception:
            price = 0.0
-    # OLD (восстановить-если-нужно):
+        try:
-    # if isinstance(all_columns, set):
+            total_kg = float(row.get("total brutto") or 0)
-    #     all_columns = sorted(all_columns)         # упорядочивал всё
+        except Exception:
            total_kg = 0.0
-    def safe(val):
+        details_json = row.get("productInformationSection.productDetailsProps") or {}
        """Преобразует dict / list в JSON-строку, None → ''."""
        if isinstance(val, (dict, list)):
            return json.dumps(val, ensure_ascii=False)
        return "" if val is None else val
-    print("📤 Сохраняем Excel...")
+        # 1) фильтр цены
-    wb = Workbook()
+        if not (20 <= price <= 1500):
-    ws = wb.active
+            pass
-    ws.title = "IKEA Products"
+        # 2) фильтр веса
-    ws.append(all_columns)
+        elif total_kg > 30:
            pass
        # 3) фильтр материалов
        elif materials_match_exclusions(details_json, EXCLUSIONS):
            pass
        else:
            # прошёл фильтры → добавляем в батч
            try:
                item = build_variant(row)
                batch_items.append(item)
            except Exception as e:
                _post_log(f"× build_variant error for {link}: {e}")
-    for row in rows:
+        # авто-сейв Excel каждые 50 строк
-        ws.append([safe(row.get(col, "")) for col in all_columns])
+        if idx % 50 == 0:
            wb.save(OUTPUT_FILE)
            print(f"💾 autosave: {OUTPUT_FILE}")
        # флаш батча при достижении лимита
        if len(batch_items) >= BATCH_SIZE:
            flush_batch()
    # финал: дописать Excel и отправить/сохранить остаток батча
    wb.save(OUTPUT_FILE)
-    print(f"\n✅ Готово: {OUTPUT_FILE}")
+    print(f"\n✅ Excel готов: {OUTPUT_FILE}")
    flush_batch()
    print("🎯 Готово.")
 if __name__ == "__main__":
    main()
--- a/Парсер_IKEA/product_links.txt
+++ b/Парсер_IKEA/product_links.txt
@ -1,8 +1,21 @@
-https://www.ikea.com/pl/pl/p/klubbsporre-poduszka-ergonomiczna-sen-bok-plecy-00446096/
+https://www.ikea.com/pl/pl/p/oevermaett-oslona-zywnosci-kpl-3-szt-silikon-wielobarwny-80417311/
-https://www.ikea.com/pl/pl/p/rosenskaerm-poduszka-ergonomiczna-sen-bok-plecy-90444366/
+https://www.ikea.com/pl/pl/p/bevara-klips-do-torebek-antracyt-ciemnozolty-90524179/
-https://www.ikea.com/pl/pl/p/styltmal-poduszka-ergonomiczna-multi-bialy-90518084/
+https://www.ikea.com/pl/pl/p/istad-torebka-strunowa-wzor-czarny-zolty-50525642/
-https://www.ikea.com/pl/pl/p/isranunkel-poduszka-ergonomiczna-multi-00576733/
+https://www.ikea.com/pl/pl/p/koessebaer-torebka-strunowa-brazowy-70599260/
-https://www.ikea.com/pl/pl/p/kvarnven-poduszka-ergonomiczna-sen-bok-plecy-70507350/
+https://www.ikea.com/pl/pl/p/istad-torebka-strunowa-wzor-zielony-40525685/
-https://www.ikea.com/pl/pl/p/loekstamfly-poduszka-ergonomiczna-sen-bok-plecy-50596192/
+https://www.ikea.com/pl/pl/p/koessebaer-kosz-na-warzywa-i-owoce-topola-60599270/
-https://www.ikea.com/pl/pl/p/papegojbuske-poduszka-ergonomiczna-sen-bok-plecy-00552845/
+https://www.ikea.com/pl/pl/p/istad-torebka-strunowa-wzor-czerwony-rozowy-80525674/
-https://www.ikea.com/pl/pl/p/nordstaloert-poduszka-ergonomiczna-sen-bok-plecy-20596240/
+https://www.ikea.com/pl/pl/p/koessebaer-torebka-strunowa-rozne-wzory-50599261/
 https://www.ikea.com/pl/pl/p/framtung-torba-na-lunch-czarny-40498922/
 https://www.ikea.com/pl/pl/p/ikea-365-etykieta-20438547/
 https://www.ikea.com/pl/pl/p/bevara-klips-do-torebek-zestaw-26-szt-rozne-kolory-00524174/
 https://www.ikea.com/pl/pl/p/fladdrig-torba-na-lunch-wzor-szary-10497212/
 https://www.ikea.com/pl/pl/p/istad-torebka-strunowa-wzor-niebieski-00525654/
 https://www.ikea.com/pl/pl/p/kustfyr-torebka-strunowa-wzor-w-koty-szary-90599607/
 https://www.ikea.com/pl/pl/p/oevermaett-oslona-na-zywnosc-zest-2-szt-silikon-30497923/
 https://www.ikea.com/pl/pl/p/filfisk-3szt-woreczki-strunowe-wielobarwny-silikon-70514628/
 https://www.ikea.com/pl/pl/p/oevermaett-przykrycie-jedzenia-silikon-40497932/
 https://www.ikea.com/pl/pl/p/koessebaer-etykiety-zestaw-25-sztuk-bialy-90599264/
 https://www.ikea.com/pl/pl/p/koessebaer-stojak-do-suszenia-zywn-z-2-tacami-bambus-80599274/
 https://www.ikea.com/pl/pl/p/gullrismott-torebka-do-przechowywania-zywnosci-na-ziemniaki-00581796/
 https://www.ikea.com/pl/pl/p/hejne-3-sekcje-polki-miekkie-dr-s99031408/
--- a/Парсер_IKEA/product_links.xlsx
+++ b/Парсер_IKEA/product_links.xlsx
--- a/Парсер_IKEA/result.xlsx
+++ b/Парсер_IKEA/result.xlsx
--- a/Парсер_IKEA/~$leaf_categories.xlsx
+++ b/Парсер_IKEA/~$leaf_categories.xlsx
		`@ -0,0 +1,2 @@`
							`# PROXY=http://user:pass@host:port`
							`# RATE_LIMIT=1.0`