MacOS_Parsers/Parsing ZARAHOME/src/xlsx_recorder.py
2025-08-05 14:49:50 +03:00

106 lines
5.1 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# xlsx_recorder.py · расширен отправкой JSON
from openpyxl import Workbook
from os.path import isdir, abspath, join
from os import mkdir
import re, json, math, logging, requests, os
log = logging.getLogger("recorder")
# ─────────────────────── настройки ───────────────────────
SEND_JSON = False # отправка POST
SAVE_JSON = True # сохранять копию JSON
POST_URL = "http://localhost:3005/parser/data"
INVALID_CHARS = r'[<>:"/\\|*?]'
def sanitize_filename(name: str, repl: str = "_") -> str:
"""Удаляет/заменяет символы, запрещённые в именах файлов."""
clean = re.sub(INVALID_CHARS, repl, name)
return clean.split("?", 1)[0].strip()
# объект для сохранения спаршенных категорий
class Recorder:
def __init__(self, records_folder="records_folder"):
# создаём папку при необходимости
rf_abs = abspath(records_folder)
if not isdir(rf_abs):
mkdir(rf_abs)
self.record_folder = rf_abs
# запись таблицы + JSON/POST
def record(self, csv_name, table):
csv_name = sanitize_filename(csv_name)
# ─── 1) сохраняем XLSX ─────────────────────────────
wb = Workbook()
ws = wb.active
for row in table:
ws.append(row)
xlsx_path = join(self.record_folder, f"{csv_name}.xlsx")
wb.save(xlsx_path)
log.info("XLSX saved → %s", xlsx_path)
# ─── 2) формируем JSON (по утверждённым правилам) ──
headers = table[0]
idx = {h: i for i, h in enumerate(headers)}
items = []
for row in table[1:]:
# базовые поля
article = row[idx["Артикул"]]
partnumber = row[idx["PartNumber"]]
size_full = row[idx["Свойство: Размер"]].replace("\n", "<br/>")
price_raw = row[idx["Цена закупки"]]
price_int = math.ceil(float(price_raw))
clr_name_raw = row[idx["Свойство: Цвет"]]
clr_name = clr_name_raw.capitalize()
vis = row[idx["Наличие на сайте"]]
weight_gram = float(row[idx["Свойство: Вес(г)"]]) if row[idx["Свойство: Вес(г)"]] else 0.0
weight_kg = math.ceil(weight_gram / 1000) if weight_gram else 0
# составляем объект
variant = {
"status_id": 1,
"color": clr_name,
"sku": f"{article}-{partnumber}",
"size": size_full,
"cost": price_int,
"originalUrl": row[idx["Краткое описание"]], # url_full в таблице
"originalName": row[idx["Название товара или услуги"]].capitalize(),
"originalDescription": (
row[idx["Полное описание"]].replace("\n", "<br/>") + "<br/>" +
row[idx["Параметр: Уход"]].replace("\n", "<br/>") + "<br/>" +
row[idx["Параметр: Происхождение"]].replace("\n", "<br/>")
).strip("<br/>"),
"originalComposition": row[idx["Параметр: Состав"]].replace("\n", "<br/>"),
"images": [img for img in row[idx["Изображения варианта"]].split("\n") if img],
"inStock": vis == "SHOW",
"weight": weight_kg
}
cat_raw = row[idx["Размещение на сайте"]].replace("Каталог/ZaraHome/WOMEN/", "")
category_name = re.sub(r"[^\w/-]+|_+", "_", cat_raw)
items.append({
"category": {"name": category_name},
"variant": variant,
"brand": {"name": "zara-home"}
})
payload = {"items": items, "parserName": "zara-home"}
# ─── 3) сохраняем JSON при необходимости ───────────
if SAVE_JSON:
json_path = join(self.record_folder, f"{csv_name}.json")
with open(json_path, "w", encoding="utf-8") as fh:
json.dump(payload, fh, ensure_ascii=False, indent=2)
log.info("JSON saved → %s", json_path)
# ─── 4) POST на локальный сервис ───────────────────
if SEND_JSON:
try:
resp = requests.post(POST_URL, json=payload, timeout=20)
resp.raise_for_status()
log.info("POST %s OK (%s items)", csv_name, len(items))
except Exception as err:
log.warning("POST %s FAILED: %s", csv_name, err)