MacOS_Parsers/Pars_Decathlon/xlsx_recorder.py

148 lines
6.0 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from openpyxl import Workbook
from os.path import isdir, abspath, join
from os import mkdir
import json
import requests
import time
from datetime import datetime
# --- Загрузка словаря весов (weight_defaults.json) ---
try:
with open("weight_defaults.json", "r", encoding="utf-8") as f:
WEIGHT_DEFAULTS = json.load(f)
except Exception:
print("⚠️ weight_defaults.json не найден — используется default=2")
WEIGHT_DEFAULTS = {"default": 2}
def get_weight(record):
"""Возвращает корректный вес товара с подстановкой из weight_defaults.json"""
try:
weight = float(record.get("Параметр: Вес(г)", 0) or 0)
except (ValueError, TypeError):
weight = 0
# если вес <= 0, подставляем из словаря
if weight <= 0:
category_name = record.get("Размещение на сайте", "")
weight = float(WEIGHT_DEFAULTS.get(category_name, WEIGHT_DEFAULTS.get("default", 2)))
return weight
class Recorder:
def __init__(self, records_folder="records_folder", try_json=True, try_send=True):
# создаём папку при первом запуске
if not isdir(abspath(records_folder)):
mkdir(abspath(records_folder))
self.record_folder = abspath(records_folder)
# флаги
self.try_json = try_json
self.try_send = try_send
# настройки API
self.api_url = "http://172.25.4.101:3005/parser/data"
# файл лога
self.log_path = join(self.record_folder, "log.txt")
# --- простая функция логирования ---
def log(self, msg: str):
ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
line = f"[{ts}] {msg}"
print(line)
with open(self.log_path, "a", encoding="utf-8") as f:
f.write(line + "\n")
def record(self, csv_name, table_data):
# === 1. Сохраняем XLSX ===
workbook = Workbook()
worksheet = workbook.active
for row in table_data:
worksheet.append(row)
xlsx_path = join(self.record_folder, f"{csv_name}.xlsx")
workbook.save(xlsx_path)
self.log(f"✅ XLSX saved → {xlsx_path}")
# === 2. JSON ===
if not self.try_json:
return # пользователь выбрал не создавать JSON
headers = table_data[0]
json_items = []
filtered_out = 0
for row in table_data[1:]:
record = dict(zip(headers, row))
brand = record.get("Свойство: Бренд", "") or record.get("Бренд", "")
category = record.get("Размещение на сайте", "")
# --- безопасное определение наличия ---
in_stock_raw = record.get("Наличие", "")
in_stock = False
try:
val = float(in_stock_raw)
in_stock = val > 2
except (ValueError, TypeError):
in_stock = False
# --- проверка фильтров cost и weight ---
try:
cost = float(record.get("Цена закупки", 0) or 0)
except (ValueError, TypeError):
cost = 0
weight = get_weight(record)
if not (50 <= cost <= 1500 and weight <= 31):
filtered_out += 1
continue # не добавляем товар
json_items.append({
"category": {"name": category},
"brand": {"name": brand},
"variant": {
"status_id": 1,
"color": record.get("Свойство: Цвет", ""),
"sku": record.get("Артикул", ""),
"size": record.get("Свойство: Размер", ""),
"cost": cost,
"originalUrl": record.get("Краткое описание", ""),
"originalName": record.get("Название товара или услуги", ""),
"originalDescription": record.get("Полное описание", ""),
"originalComposition": record.get("Параметр: Состав", ""),
"images": (record.get("Изображения варианта", "") or record.get("Изображения", "")).split("\n"),
"inStock": in_stock,
"weight": weight,
}
})
json_data = {"parserName": "decathlon", "items": json_items}
json_path = join(self.record_folder, f"{csv_name}.json")
with open(json_path, "w", encoding="utf-8") as f:
json.dump(json_data, f, ensure_ascii=False, indent=2)
self.log(
f"✅ JSON saved → {json_path} | items: {len(json_items)} | filtered out: {filtered_out}"
)
# === 3. Отправка JSON ===
if self.try_send:
self.send_json_to_api(json_data, csv_name)
def send_json_to_api(self, json_data, csv_name):
total_items = len(json_data.get("items", []))
for attempt in range(1, 4):
try:
resp = requests.post(self.api_url, json=json_data, timeout=1)
if resp.status_code == 200:
self.log(f"✅ [{csv_name}] JSON sent to API successfully (attempt {attempt}) | items: {total_items}")
return
else:
self.log(f"⚠️ [{csv_name}] API response {resp.status_code}: {resp.text}")
except Exception as e:
self.log(f"❌ [{csv_name}] Error sending to API (attempt {attempt}): {e}")
time.sleep(5)
self.log(f"🚫 [{csv_name}] Failed to send JSON after 3 attempts.")