MacOS_Parsers/Парсер_IKEA/ikea_collect_product_linksANDinfo-fullJSON.py
2025-10-07 14:17:12 +03:00

112 lines
3.9 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import json
import requests
import datetime
import pathlib
import re
# ──────────────── ПУТИ ────────────────
BASE_DIR = pathlib.Path(__file__).resolve().parent
CAT_FILE = BASE_DIR / "leaf_categories.txt" # список категорий IKEA
OUT_DIR = BASE_DIR / "json_raw"
OUT_DIR.mkdir(exist_ok=True)
LOG_FILE = BASE_DIR / "fetch_log.txt"
# ──────────────── API ────────────────
SEARCH_URL = "https://sik.search.blue.cdtapps.com/pl/pl/search?c=listaf&v=20250507"
HEADERS = {
"User-Agent": "Mozilla/5.0",
"Content-Type": "application/json",
}
# ──────────────── ВСПОМОГАТЕЛЬНОЕ ────────────────
def log(msg: str):
ts = datetime.datetime.now().strftime("[%Y-%m-%d %H:%M:%S] ")
print(ts + msg)
with LOG_FILE.open("a", encoding="utf-8") as f:
f.write(ts + msg + "\n")
def fetch_category_json(category_id: str) -> dict:
"""Делает POST к IKEA API и возвращает чистый JSON"""
payload = {
"searchParameters": {"input": category_id, "type": "CATEGORY"},
"zip": "05-090",
"store": "188",
"isUserLoggedIn": False,
"optimizely": {
"listing_3547_filter_hnf_sticky": None,
"listing_3332_collapsed_filter_bar": None,
"discount_percentage": None,
"listing_3790_simplify_rating_stars": None
},
"optimizelyAttributes": {
"market": "pl",
"device": "desktop",
"deviceVendor": "Apple",
"deviceType": "desktop",
"isLoggedIn": False,
"environment": "prod",
"browser": "Chrome",
"os": "Mac OS",
"language": "pl",
"feedMarket": "pl-PL",
"locale": "pl-PL",
"customerType": "guest",
"isEntranceVisit": False,
"pip_to_pip_src": ""
},
"components": [{
"component": "PRIMARY_AREA",
"columns": 4,
"types": {
"main": "PRODUCT",
"breakouts": ["PLANNER", "LOGIN_REMINDER", "MATTRESS_WARRANTY"]
},
"filterConfig": {"max-num-filters": 6},
"window": {"size": 1000, "offset": 0},
"forceFilterCalculation": True
}]
}
log(f"POST {SEARCH_URL} category_id={category_id}")
r = requests.post(SEARCH_URL, headers=HEADERS, json=payload, timeout=30)
log(f"→ Status: {r.status_code}")
r.raise_for_status()
return r.json()
# ──────────────── MAIN ────────────────
def main():
if not CAT_FILE.exists():
print("✖ Файл leaf_categories.txt не найден.")
return
categories = [line.strip() for line in CAT_FILE.read_text(encoding="utf-8").splitlines() if line.strip()]
if not categories:
print("✖ Нет категорий для обработки.")
return
for idx, url in enumerate(categories, 1):
log(f"[{idx}/{len(categories)}] {url}")
m = re.search(r"-([0-9]+)/?$", url.rstrip("/"))
if not m:
log("⚠️ Не найден ID категории в URL")
continue
cat_id = m.group(1)
try:
data = fetch_category_json(cat_id)
fname = f"cat_{cat_id}_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
fpath = OUT_DIR / fname
with fpath.open("w", encoding="utf-8") as f:
json.dump(data, f, ensure_ascii=False, indent=2)
log(f"✅ JSON сохранён: {fpath.name}")
except Exception as e:
log(f"❌ Ошибка при категории {cat_id}: {e}")
log("🎯 Готово.")
if __name__ == "__main__":
main()