#!/usr/bin/env python3 # -*- coding: utf-8 -*- import json import requests import datetime import pathlib import re from openpyxl import Workbook # ──────────────── ПУТИ ──────────────── BASE_DIR = pathlib.Path(__file__).resolve().parent CAT_FILE = BASE_DIR / "leaf_categories.txt" # список категорий IKEA OUT_DIR = BASE_DIR / "json_raw" OUT_DIR.mkdir(exist_ok=True) LOG_FILE = BASE_DIR / "fetch_log.txt" OUT_JSON = OUT_DIR / "flattened_products.json" OUT_XLSX = OUT_DIR / "flattened_products.xlsx" # ──────────────── API ──────────────── SEARCH_URL = "https://sik.search.blue.cdtapps.com/pl/pl/search?c=listaf&v=20250507" HEADERS = { "User-Agent": "Mozilla/5.0", "Content-Type": "application/json", } # ──────────────── ВСПОМОГАТЕЛЬНОЕ ──────────────── def log(msg: str): ts = datetime.datetime.now().strftime("[%Y-%m-%d %H:%M:%S] ") print(ts + msg) with LOG_FILE.open("a", encoding="utf-8") as f: f.write(ts + msg + "\n") def fetch_category_json(category_id: str) -> dict: """Делает POST к IKEA API и возвращает чистый JSON""" payload = { "searchParameters": {"input": category_id, "type": "CATEGORY"}, "zip": "05-090", "store": "188", "isUserLoggedIn": False, "optimizely": { "listing_3547_filter_hnf_sticky": None, "listing_3332_collapsed_filter_bar": None, "discount_percentage": None, "listing_3790_simplify_rating_stars": None }, "optimizelyAttributes": { "market": "pl", "device": "desktop", "deviceVendor": "Apple", "deviceType": "desktop", "isLoggedIn": False, "environment": "prod", "browser": "Chrome", "os": "Mac OS", "language": "pl", "feedMarket": "pl-PL", "locale": "pl-PL", "customerType": "guest", "isEntranceVisit": False, "pip_to_pip_src": "" }, "components": [{ "component": "PRIMARY_AREA", "columns": 4, "types": { "main": "PRODUCT", "breakouts": ["PLANNER", "LOGIN_REMINDER", "MATTRESS_WARRANTY"] }, "filterConfig": {"max-num-filters": 6}, "window": {"size": 1000, "offset": 0}, "forceFilterCalculation": True }] } log(f"POST {SEARCH_URL} category_id={category_id}") r = requests.post(SEARCH_URL, headers=HEADERS, json=payload, timeout=30) log(f"→ Status: {r.status_code}") r.raise_for_status() return r.json() def extract_products(data: dict) -> list[dict]: """Извлекает товары и варианты из ответа IKEA""" products = [] for result in data.get("results", []): for item in result.get("items", []): product = item.get("product") if not product: continue # Собираем категорию category_path = " / ".join(c.get("name", "") for c in product.get("categoryPath", [])) def extract_one(prod): av = prod.get("availability", []) av0_status = av[0].get("status") if len(av) > 0 else "" av1_status = av[1].get("status") if len(av) > 1 else "" av1_store = av[1].get("store") if len(av) > 1 else "" price = ( prod.get("salesPrice", {}) .get("current", {}) .get("wholeNumber", "") ) return { "id": prod.get("id") or prod.get("itemNoGlobal"), "pipUrl": prod.get("pipUrl", ""), "availability_0_status": av0_status, "availability_1_status": av1_status, "availability_1_store": av1_store, "price": price, "category_path": category_path, } # Основной продукт products.append(extract_one(product)) # Варианты variants = ( product.get("gprDescription", {}).get("variants", []) ) for v in variants: products.append(extract_one(v)) return products # ──────────────── MAIN ──────────────── def main(): if not CAT_FILE.exists(): print("✖ Файл leaf_categories.txt не найден.") return categories = [ line.strip() for line in CAT_FILE.read_text(encoding="utf-8").splitlines() if line.strip() ] if not categories: print("✖ Нет категорий для обработки.") return all_products = [] for idx, url in enumerate(categories, 1): log(f"[{idx}/{len(categories)}] {url}") m = re.search(r"-([0-9]+)/?$", url.rstrip("/")) if not m: log("⚠️ Не найден ID категории в URL") continue cat_id = m.group(1) try: data = fetch_category_json(cat_id) items = extract_products(data) all_products.extend(items) log(f"✅ {len(items)} товаров добавлено из категории {cat_id}") except Exception as e: log(f"❌ Ошибка при категории {cat_id}: {e}") if not all_products: log("⚠️ Нет товаров для сохранения.") return # Сохраняем JSON with OUT_JSON.open("w", encoding="utf-8") as f: json.dump(all_products, f, ensure_ascii=False, indent=2) log(f"💾 JSON сохранён → {OUT_JSON.name} ({len(all_products)} записей)") # Сохраняем Excel wb = Workbook() ws = wb.active ws.title = "IKEA_flat" headers = list(all_products[0].keys()) ws.append(headers) for row in all_products: ws.append([row.get(h, "") for h in headers]) wb.save(OUT_XLSX) log(f"📊 Excel сохранён → {OUT_XLSX.name}") log("🎯 Готово.") if __name__ == "__main__": main()