#!/usr/bin/env python3 # -*- coding: utf-8 -*- import json import requests import datetime import pathlib import re # ──────────────── ПУТИ ──────────────── BASE_DIR = pathlib.Path(__file__).resolve().parent CAT_FILE = BASE_DIR / "leaf_categories.txt" # список категорий IKEA OUT_DIR = BASE_DIR / "json_raw" OUT_DIR.mkdir(exist_ok=True) LOG_FILE = BASE_DIR / "fetch_log.txt" # ──────────────── API ──────────────── SEARCH_URL = "https://sik.search.blue.cdtapps.com/pl/pl/search?c=listaf&v=20250507" HEADERS = { "User-Agent": "Mozilla/5.0", "Content-Type": "application/json", } # ──────────────── ВСПОМОГАТЕЛЬНОЕ ──────────────── def log(msg: str): ts = datetime.datetime.now().strftime("[%Y-%m-%d %H:%M:%S] ") print(ts + msg) with LOG_FILE.open("a", encoding="utf-8") as f: f.write(ts + msg + "\n") def fetch_category_json(category_id: str) -> dict: """Делает POST к IKEA API и возвращает чистый JSON""" payload = { "searchParameters": {"input": category_id, "type": "CATEGORY"}, "zip": "05-090", "store": "188", "isUserLoggedIn": False, "optimizely": { "listing_3547_filter_hnf_sticky": None, "listing_3332_collapsed_filter_bar": None, "discount_percentage": None, "listing_3790_simplify_rating_stars": None }, "optimizelyAttributes": { "market": "pl", "device": "desktop", "deviceVendor": "Apple", "deviceType": "desktop", "isLoggedIn": False, "environment": "prod", "browser": "Chrome", "os": "Mac OS", "language": "pl", "feedMarket": "pl-PL", "locale": "pl-PL", "customerType": "guest", "isEntranceVisit": False, "pip_to_pip_src": "" }, "components": [{ "component": "PRIMARY_AREA", "columns": 4, "types": { "main": "PRODUCT", "breakouts": ["PLANNER", "LOGIN_REMINDER", "MATTRESS_WARRANTY"] }, "filterConfig": {"max-num-filters": 6}, "window": {"size": 1000, "offset": 0}, "forceFilterCalculation": True }] } log(f"POST {SEARCH_URL} category_id={category_id}") r = requests.post(SEARCH_URL, headers=HEADERS, json=payload, timeout=30) log(f"→ Status: {r.status_code}") r.raise_for_status() return r.json() # ──────────────── MAIN ──────────────── def main(): if not CAT_FILE.exists(): print("✖ Файл leaf_categories.txt не найден.") return categories = [line.strip() for line in CAT_FILE.read_text(encoding="utf-8").splitlines() if line.strip()] if not categories: print("✖ Нет категорий для обработки.") return for idx, url in enumerate(categories, 1): log(f"[{idx}/{len(categories)}] {url}") m = re.search(r"-([0-9]+)/?$", url.rstrip("/")) if not m: log("⚠️ Не найден ID категории в URL") continue cat_id = m.group(1) try: data = fetch_category_json(cat_id) fname = f"cat_{cat_id}_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.json" fpath = OUT_DIR / fname with fpath.open("w", encoding="utf-8") as f: json.dump(data, f, ensure_ascii=False, indent=2) log(f"✅ JSON сохранён: {fpath.name}") except Exception as e: log(f"❌ Ошибка при категории {cat_id}: {e}") log("🎯 Готово.") if __name__ == "__main__": main()