112 lines
3.9 KiB
Python
112 lines
3.9 KiB
Python
#!/usr/bin/env python3
|
||
# -*- coding: utf-8 -*-
|
||
|
||
import json
|
||
import requests
|
||
import datetime
|
||
import pathlib
|
||
import re
|
||
|
||
# ──────────────── ПУТИ ────────────────
|
||
BASE_DIR = pathlib.Path(__file__).resolve().parent
|
||
CAT_FILE = BASE_DIR / "leaf_categories.txt" # список категорий IKEA
|
||
OUT_DIR = BASE_DIR / "json_raw"
|
||
OUT_DIR.mkdir(exist_ok=True)
|
||
|
||
LOG_FILE = BASE_DIR / "fetch_log.txt"
|
||
|
||
# ──────────────── API ────────────────
|
||
SEARCH_URL = "https://sik.search.blue.cdtapps.com/pl/pl/search?c=listaf&v=20250507"
|
||
HEADERS = {
|
||
"User-Agent": "Mozilla/5.0",
|
||
"Content-Type": "application/json",
|
||
}
|
||
|
||
# ──────────────── ВСПОМОГАТЕЛЬНОЕ ────────────────
|
||
def log(msg: str):
|
||
ts = datetime.datetime.now().strftime("[%Y-%m-%d %H:%M:%S] ")
|
||
print(ts + msg)
|
||
with LOG_FILE.open("a", encoding="utf-8") as f:
|
||
f.write(ts + msg + "\n")
|
||
|
||
def fetch_category_json(category_id: str) -> dict:
|
||
"""Делает POST к IKEA API и возвращает чистый JSON"""
|
||
payload = {
|
||
"searchParameters": {"input": category_id, "type": "CATEGORY"},
|
||
"zip": "05-090",
|
||
"store": "188",
|
||
"isUserLoggedIn": False,
|
||
"optimizely": {
|
||
"listing_3547_filter_hnf_sticky": None,
|
||
"listing_3332_collapsed_filter_bar": None,
|
||
"discount_percentage": None,
|
||
"listing_3790_simplify_rating_stars": None
|
||
},
|
||
"optimizelyAttributes": {
|
||
"market": "pl",
|
||
"device": "desktop",
|
||
"deviceVendor": "Apple",
|
||
"deviceType": "desktop",
|
||
"isLoggedIn": False,
|
||
"environment": "prod",
|
||
"browser": "Chrome",
|
||
"os": "Mac OS",
|
||
"language": "pl",
|
||
"feedMarket": "pl-PL",
|
||
"locale": "pl-PL",
|
||
"customerType": "guest",
|
||
"isEntranceVisit": False,
|
||
"pip_to_pip_src": ""
|
||
},
|
||
"components": [{
|
||
"component": "PRIMARY_AREA",
|
||
"columns": 4,
|
||
"types": {
|
||
"main": "PRODUCT",
|
||
"breakouts": ["PLANNER", "LOGIN_REMINDER", "MATTRESS_WARRANTY"]
|
||
},
|
||
"filterConfig": {"max-num-filters": 6},
|
||
"window": {"size": 1000, "offset": 0},
|
||
"forceFilterCalculation": True
|
||
}]
|
||
}
|
||
|
||
log(f"POST {SEARCH_URL} category_id={category_id}")
|
||
r = requests.post(SEARCH_URL, headers=HEADERS, json=payload, timeout=30)
|
||
log(f"→ Status: {r.status_code}")
|
||
r.raise_for_status()
|
||
return r.json()
|
||
|
||
# ──────────────── MAIN ────────────────
|
||
def main():
|
||
if not CAT_FILE.exists():
|
||
print("✖ Файл leaf_categories.txt не найден.")
|
||
return
|
||
|
||
categories = [line.strip() for line in CAT_FILE.read_text(encoding="utf-8").splitlines() if line.strip()]
|
||
if not categories:
|
||
print("✖ Нет категорий для обработки.")
|
||
return
|
||
|
||
for idx, url in enumerate(categories, 1):
|
||
log(f"[{idx}/{len(categories)}] {url}")
|
||
m = re.search(r"-([0-9]+)/?$", url.rstrip("/"))
|
||
if not m:
|
||
log("⚠️ Не найден ID категории в URL")
|
||
continue
|
||
cat_id = m.group(1)
|
||
try:
|
||
data = fetch_category_json(cat_id)
|
||
fname = f"cat_{cat_id}_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
|
||
fpath = OUT_DIR / fname
|
||
with fpath.open("w", encoding="utf-8") as f:
|
||
json.dump(data, f, ensure_ascii=False, indent=2)
|
||
log(f"✅ JSON сохранён: {fpath.name}")
|
||
except Exception as e:
|
||
log(f"❌ Ошибка при категории {cat_id}: {e}")
|
||
|
||
log("🎯 Готово.")
|
||
|
||
if __name__ == "__main__":
|
||
main()
|