MacOS_Parsers/Parsing ZARAHOME/src/extractor вывод всего содержимого json в файлах.py
2025-07-28 16:20:11 +03:00

996 lines
41 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from json import load, loads
from os.path import abspath
from bs4 import BeautifulSoup
from lxml import etree
from re import split, search, sub
import json, textwrap
from pathlib import Path, PurePath
import json, time
def extract_components_zarahome(parts):
composition = []
for part in parts:
if part.get("areas") and part.get("description"):
if len(parts) != 1:
composition.append(part["description"])
for area in part["areas"]:
area_name = area["description"]
percentage_area = area["percentageArea"]
composition.append(f"{area_name} ({percentage_area})")
for component in area["components"]:
material = component["material"]
percentage = component["percentage"]
composition.append(f"{percentage} {material}")
elif part.get("components") and part.get("description"):
if len(parts) != 1:
composition.append(part["description"])
for component in part["components"]:
material = component["material"]
percentage = component["percentage"]
composition.append(f"{percentage} {material}")
return composition
# класс для извлечения нужных данных
class Extractor:
def __init__(self, json_data):
self.methods = {
"": (self.default_extract_method, []),
"zarahome": (self.zarahome_extract_method, [
"Краткое описание",
"Артикул",
"Название товара или услуги",
"Полное описание",
"Образец цвета",
"Свойство: Цвет",
"Свойство: Размер",
"Цена закупки",
"Свойство: Вес(г)",
"Изображения",
"Изображения варианта",
"Параметр: Состав",
"Параметр: Уход",
"Параметр: Происхождение",
"Размещение на сайте",
"Свойство: Бренд"
]),
"eobuwie": (self.eobuwie_extract_method, [
"Краткое описание",
"Артикул",
"Свойство: Размер",
"Полное описание(Таблица)",
"Название товара или услуги",
"Изображения",
"Размещение на сайте",
"Цена",
"Наличие"
]),
"decathlon": (self.decathlon_extract_method, [
"Краткое описание",
"Артикул",
"Название товара или услуги",
"Полное описание",
"Наличие",
"Свойство: Цвет",
"Свойство: Размер",
"Цена закупки",
"Параметр: Вес(г)",
"Изображения варианта",
"Размещение на сайте"
]),
"zara": (self.zara_extract_method, [
"Краткое описание",
"Артикул",
"Название товара или услуги",
"Наличие",
"Образец цвета",
"Свойство: Цвет",
"Свойство: Размер",
"Цена закупки",
"Изображения",
"Параметр: Состав",
"Параметр: Уход",
"Параметр: Происхождение",
"Размещение на сайте",
"Свойство: Бренд"
]),
"chanel": (self.chanel_extract_method, [
"Краткое описание",
"Артикул",
"Наличие",
"Свойство: Цвет",
"Свойство: Размер",
"Цена закупки",
"Изображения",
"Размещение на сайте",
"Свойство: Бренд"
])
}
self.method = json_data["method"]
self.tags = json_data["tags"]
self.headers = self.methods[self.method][1]
for tag in self.tags:
self.headers.insert(tag["column_number"], tag["column_name"])
def extract(self, parser, recorder, categories):
self.methods[self.method][0](parser, recorder, categories)
def default_extract_method(self):
pass
def tags_extract(self, soup, row):
dom_tree = etree.HTML(str(soup))
for tag in self.tags:
xpath_result = dom_tree.xpath(tag["xpath"])
column_data = ""
if len(xpath_result):
for element in xpath_result:
column_data = ''.join(element.itertext()).strip() + "\n"
row.insert(tag["column_number"], column_data)
def chanel_extract_method(self, parser, recorder, categories):
BASE_URL = "https://www.chanel.com"
for i, category in enumerate(categories):
table = [self.headers]
print(f"Categories: {i + 1} / {len(categories)}", category)
continue_loop = True
category_page = 1
request_elements_count = 24
product_number = 1
category_pattern = r"\/pl\/[\w\d]+\/"
location = "chanel/" + search(category_pattern, category)[0].replace("pl", "").replace("/", "")
while continue_loop:
category_data = parser.parse(f"{category}?requestType=ajax&page={category_page}&totalElementsCount={request_elements_count}", return_type="json")
if not category_data["next"]:
continue_loop = False
products_count = category_data["totalProducts"]
for product in category_data["dataLayer"]["productList"].values():
first_variant = True
article_pattern = r"\/p\/[\d\w]+/"
base_link = BASE_URL + product["quickviewPopin"]["page"]
print(f"Products: {product_number} / {products_count}", base_link)
product_number += 1
links = [base_link]
while len(links):
product_url = links.pop(0)
product_page = parser.parse(product_url)
if product_page == None:
continue
soup = BeautifulSoup(product_page, "html.parser")
if first_variant:
first_variant = False
variants_links = soup.select(".link.js-tabpanel-anchor")
replace_pattern = r"\/p\/.+$"
for variant_link in variants_links:
article = variant_link.get("data-value")
if not article in product_url:
links.append(sub(replace_pattern, f"/p/{article}", product_url))
product_url = soup.select("[property=\"og:url\"]")[0].get("content")
article = search(article_pattern, product_url)[0].replace("/", "").replace("p", "")
product_info = parser.parse(f"{BASE_URL}/pl/yapi/product/{article}?options=basic,vto,variants,stock&site=chanel", return_type="json")
stock = 0
if product_info["stock"]["stockLevel"] == "IN_STOCK":
stock = 1
product_color_name = product_info["color"]["name"]
product_size = product_info.get("size")
product_price = product_info["buyNow"].get("priceValue")
images = "\n".join(map(lambda x: x["url"], product_info["basic"]["images"]))
product_brand = "chanel"
try:
table_data = []
table_data.append([
product_url,
article,
stock,
product_color_name,
product_size,
product_price,
images,
location,
product_brand
])
self.tags_extract(soup, table_data[-1])
table += table_data.copy()
except Exception as error:
print(f"Extractor Error: {error}")
csv_name = category.replace(f"{BASE_URL}/pl/", "").replace("/", "_")
recorder.record(csv_name, table)
def zara_extract_method(self, parser, recorder, categories):
BASE_URL = "https://www.zara.com"
BASE_POLISH_URL = "https://www.zara.com/pl/en/"
for i, category in enumerate(categories):
table = [self.headers]
print(f"Categories: {i + 1} / {len(categories)}", category)
category_page = parser.parse(category)
category_soup = BeautifulSoup(category_page, "html.parser")
verify_url = category_soup.select("[http-equiv=\"refresh\"]")[0].get("content").split("'")[1]
bm_verify = verify_url.split("?")[-1]
category_page = parser.parse(BASE_URL + verify_url)
category_soup = BeautifulSoup(category_page, "html.parser")
tag_script_inner = category_soup.select("[type=\"text/javascript\"][data-compress=\"true\"]")[0].text
analytics_data = loads(search(r"zara\.analyticsData\s?=\s?{.+};", tag_script_inner)[0].split("=")[1].replace(";", ""))
category_id = analytics_data["catGroupId"]
category_products = parser.parse(f"{BASE_POLISH_URL}category/{category_id}/products?ajax=true", return_type="json")
location = "ZARA/" + "/".join(category.split("/")[5].split("-")[:2]).upper()
all_products_count = 0
for element in category_products["productGroups"][0]["elements"]:
products = element.get("commercialComponents")
if not products:
continue
for product in products:
if not product.get("name"):
continue
all_products_count += 1
product_number = 0
for element in category_products["productGroups"][0]["elements"]:
products = element.get("commercialComponents")
if not products:
continue
for product in products:
product_name = product.get("name")
if not product_name:
continue
product_number += 1
seo_keyword = product["seo"]["keyword"]
seo_id = product["seo"]["seoProductId"]
if not seo_keyword:
continue
product_url = f"{BASE_POLISH_URL}{seo_keyword}-p{seo_id}.html"
print(f"Products: {product_number} / {all_products_count}", product_url)
article = product["detail"]["displayReference"]
product_color_hex = product["colorInfo"].get("mainColorHexCode")
product_color_name = product["detail"]["colors"][0]["name"]
product_price = product["price"] / 100
product_brand = product["brand"].get("brandGroupCode")
product_page = parser.parse(f"{product_url}?{bm_verify}")
if product_page == None:
continue
soup = BeautifulSoup(product_page, "html.parser")
sizes = soup.select("[data-qa-action][role=\"option\"]")
images = "\n".join(map(lambda x: x.get("srcset").split(", ")[-1].split(" ")[0], soup.select(f"source[sizes=\"32vw\"]")))
product_id = product["id"]
extra_data = parser.parse(f"https://www.zara.com/pl/pl/product/{product_id}/extra-detail?ajax=true", return_type="json")
extra_data_extracted = {}
for section in extra_data:
extra_data_extracted[section["sectionType"]] = ""
for component in section["components"]:
if component["datatype"] in ["subtitle", "paragraph"]:
extra_data_extracted[section["sectionType"]] += component["text"]["value"] + "\n"
elif component["datatype"] == "spacer":
extra_data_extracted[section["sectionType"]] += "\n"
elif component["datatype"] == "iconList":
for item in component["items"]:
if item["datatype"] == "iconListItem" and item["description"]["datatype"] == "text":
extra_data_extracted[section["sectionType"]] += item["description"]["value"] + "\n"
materials = extra_data_extracted.get("materials")
care = extra_data_extracted.get("care")
origin = extra_data_extracted.get("origin")
for size in sizes:
try:
table_data = []
if size.get("data-qa-action") == "size-in-stock":
stock = 1
else:
stock = 0
product_size = size.select(".product-size-info__main-label")[0].text
table_data.append([
product_url,
f"{article} - {product_size}",
product_name,
stock,
product_color_hex,
product_color_name,
product_size,
product_price,
images,
materials,
care,
origin,
location,
product_brand
])
self.tags_extract(soup, table_data[-1])
table += table_data.copy()
except Exception as error:
print(f"Extractor Error: {error}")
csv_name = category.split("/")[-1].split("?")[0]
recorder.record(csv_name, table)
def decathlon_extract_method(self, parser, recorder, categories):
BASE_URL = "https://www.decathlon.pl"
for i, category in enumerate(categories):
table = [self.headers]
print(f"Categories: {i + 1} / {len(categories)}", category)
continue_loop = True
category_from = 0
while continue_loop:
category_page = parser.parse(f"{category}?from={category_from}")
category_soup = BeautifulSoup(category_page, "html.parser")
offers_count = int(category_soup.select("h1 ~ span.count")[0].text.split(" ")[0])
products_links = category_soup.select("[class$=\"model-link\"]")
products_links_count = len(products_links)
for e, product_link in enumerate(products_links):
product_url = BASE_URL + product_link.get("href")
print(f"Products: {e + 1 + category_from} / {offers_count}", product_url)
product_page = parser.parse(product_url)
if product_page == None:
continue
soup = BeautifulSoup(product_page, "html.parser")
meta_script_tags = soup.select("[type=\"application/ld+json\"]")
if len(meta_script_tags) <= 1:
continue
meta_data = loads(meta_script_tags[1].text)
path_steps = []
for step in meta_data["itemListElement"]:
path_steps.append(step["item"]["name"])
product_path = "decathlon/" + "/".join(path_steps)
script_json = soup.select("#__dkt")[0]
__dkt = loads(script_json.text.replace("__DKT = ", ""))
if __dkt["_ctx"]["page"]["id"] != "product":
continue
models_data = __dkt["_ctx"]["data"][4]["data"]["models"]
for model in models_data:
color = ""
colors = []
if model.get("colors"):
for color_info in model["colors"]:
colors.append(color_info["label"])
color = " / ".join(colors)
images = []
for image_info in model["images"]["product"]:
images.append(image_info["url"].replace("/250x250", ""))
image_lines = "\n".join(images)
product_name = model["webLabel"]
product_description = soup.select("[id^=\"ProductFunctionalities\"]")
if len(product_description):
product_description = product_description[0].encode_contents()
else:
product_description = ""
skus_data = model["skus"]
sku_ids = []
for sku in skus_data:
sku_ids.append(sku["skuId"])
sku_ids = ",".join(sku_ids)
stocks = parser.parse(f"https://www.decathlon.pl/pl/ajax/nfs/stocks/online?skuIds={sku_ids}", return_type="json")
for sku in skus_data:
try:
sku_id = sku["skuId"]
stock = stocks[sku_id]["stockOnline"] if stocks.get(sku_id) else "unknown"
table_data = []
article = f'{model["modelId"]}-{sku_id}'
size = ""
if sku.get("size"):
size = sku["size"]
price = ""
if sku.get("price"):
price = sku["price"]
weight = ""
if sku.get("grossWeight"):
weight = float(sku["grossWeight"])
table_data.append([
product_url,
article,
product_name,
product_description,
stock,
color,
size,
price,
weight,
image_lines,
product_path
])
self.tags_extract(soup, table_data[-1])
table += table_data.copy()
except Exception as error:
print(f"Extractor Error: {error}")
if offers_count == products_links_count + category_from:
continue_loop = False
else:
category_from += products_links_count
csv_name = "_".join(category.split("/")[4:]).replace(":", "-").replace("?", "_").replace("=", "_")
recorder.record(csv_name, table)
def eobuwie_extract_method(self, parser, recorder, categories):
for i, category in enumerate(categories):
table = [self.headers]
print(f"Categories: {i + 1} / {len(categories)}", category)
category_page = 1
category_marka = category.split(":")[2].split("?")[0]
category_type = category.split("/")[4]
while True:
category_products_data = parser.parse(f"https://eobuwie.com.pl/t-api/rest/search/eobuwie/v5/search?channel=eobuwie&currency=PLN&locale=pl_PL&limit=100&page={category_page}&filters[marka][in][]={category_marka}&categories[]={category_type}&select[]=url_key&select[]=product_group_associated&select[]=images&select[]=final_price&select[]=footwear_size&select_locales[]=pl_PL", return_type="json")
total = category_products_data["total"]
products = category_products_data["products"]
for e, product in enumerate(products):
short_url = product["values"]["url_key"]["value"]["pl_PL"]
product_url = f"https://eobuwie.com.pl/p/{short_url}"
print(f"Products: {e + 1 + ((category_page - 1) * 100)} / {total}", product_url)
product_page = parser.parse(product_url)
if product_page == None:
continue
soup = BeautifulSoup(product_page, "html.parser")
links = soup.select(".breadcrumb-list .text-link")[2:]
product_location = "/".join(list(map(lambda x: x.text, links)))
product_group = ""
if product["values"].get("product_group_associated") and product["values"]["product_group_associated"].get("value"):
product_group = product["values"]["product_group_associated"]["value"]
product_name = soup.select("[data-test-id=\"product-name\"]")[0].text.strip()
product_name = split(r"\d", product_name)[0]
product_name = f"{product_name} - {product_group}"
images_list = []
if product["values"].get("images") and product["values"]["images"].get("value"):
for image in product["values"]["images"]["value"]:
if image.get("url"):
images_list.append(f'https://img.modivo.cloud/eob_product_1800w_1800h({image["url"]}.jpg,webp)')
images_list = "\n".join(images_list)
for i, variant in enumerate(product["variants"].values()):
try:
table_data = []
size_url = variant["size"]
variant_url = f"{product_url}?size={size_url}"
article = variant["id"]
size_name = ""
if variant["values"].get("footwear_size"):
size_name = variant["values"]["footwear_size"]["value"]["label"]
description = ""
location = f"Каталог/Обувь и аксессуары/{product_location}"
availability = variant["stock_quantity"]
if variant["stock_quantity"]:
price = variant["offers"][0]["final_price"]["amount"]
else:
price = product["values"]["final_price"]["value"]["pl_PL"]["PLN"]["amount"]
table_data.append([
variant_url,
article,
size_name,
description,
product_name,
images_list,
location,
price,
availability
])
self.tags_extract(soup, table_data[-1])
table += table_data.copy()
except Exception as error:
print(f"Extractor Error: {error}")
if category_page * 100 >= total:
break
category_page += 1
csv_name = category.split("/")[-1].replace(":", "-").replace("?", "_").replace("=", "_")
recorder.record(csv_name, table)
# ────────────────────────────────────────────────────────────────
# ZARA HOME — обновлённый метод
# ────────────────────────────────────────────────────────────────
def zarahome_extract_method(self, parser, recorder, categories):
BASE_API = "https://www.zarahome.com/itxrest/3/catalog/store/85009924/80290000"
USER_BRAND = "ZARAHOME"
for i, category in enumerate(categories):
table = [self.headers]
print(f"Categories: {i + 1} / {len(categories)} {category}")
# ── HTML категории ───────────────────────────────────────
html = parser.parse(category)
if html is None:
print("Extractor Error: empty category page"); continue
soup = BeautifulSoup(html, "html.parser")
script = soup.select_one("#serverApp-state")
if not script:
print("Extractor Error: script#serverApp-state not found"); continue
try:
state = loads(script.string)
except Exception as e:
print(f"Extractor Error: bad JSON ({e})"); continue
# ── category_id ──────────────────────────────────────────
cdata = state.get("inditex-data", {})
cat_id = (cdata.get("iCategoryId")
or cdata.get("categoryId")
or cdata.get("iCategoryJSON", {}).get("id"))
if not cat_id:
for k in state:
m = search(r"/category/(\d+)/product", k)
if m: cat_id = m.group(1); break
if not cat_id:
print("Extractor Error: cannot detect category_id"); continue
# ── блок с продуктами или их ID ─────────────────────────
key = next((k for k in state if f"/category/{cat_id}/product" in k), None)
if not key:
print("Extractor Error: products block not found"); continue
prod_block = state[key]
summaries = []
# ★ СТАРАЯ схема: в JSON уже есть ["products"]
if "products" in prod_block:
for grp in prod_block["products"]:
for s in grp["bundleProductSummaries"]:
summaries.append({
"productUrl": s.get("productUrl", ""),
"__full": None, # полного JSON пока нет
"detail": s["detail"] # нужен reference
})
# ★ НОВАЯ схема: есть только ID-шки, тянем их пачками
else:
ids = (prod_block.get("productIds")
or prod_block.get("sortedProductIds")
or prod_block.get("sortedProductIdsByPricesAsc")
or [])
print(f"→ pulling {len(ids)} products via API")
CHUNK = 1
for p in range(0, len(ids), CHUNK):
ids_chunk = ",".join(map(str, ids[p:p+CHUNK]))
api = (f"{BASE_API}/productsArray"
f"?languageId=-1&productIds={ids_chunk}&appId=1")
data = parser.parse(api, return_type="json")
# печатаем красиво (ANSI-символы не экранируем, чтобы было читаемо)
print("\n=== RAW API JSON ===")
print(textwrap.indent(json.dumps(data, ensure_ascii=False, indent=2), " "))
print("=== END ===\n")
#### Печать в файл
fname = PurePath(api).parts[-1].split("?")[0] # productsArray
ts = int(time.time())
Path(f"/Users/valis/Yandex.Disk.localized/Python3/Parsing ZARAHOME/src_2024-09-05/records_folderdebug_{fname}_{ts}.json").write_text(
json.dumps(data, ensure_ascii=False, indent=2),
encoding="utf-8"
)
print(f"→ RAW saved to debug_{fname}_{ts}.json")
for prod in data.get("products", []):
summaries.append({
"productUrl": prod.get("productUrl", ""),
"__full": prod # уже полный JSON
})
# ── путь категории для итоговой таблицы ─────────────────
cat_json = cdata.get("iCategoryJSON", {})
cat_title = "/".join(cat_json.get("parentNames", []) +
[cat_json.get("name", "")])
cat_path = f"Каталог/ZaraHome/{cat_title}"
seen = set()
for n, summary in enumerate(summaries, 1):
short_url = summary.get("productUrl")
if not short_url or short_url in seen:
continue
seen.add(short_url)
print(f"Products: {n} / {len(summaries)} "
f"https://www.zarahome.com/pl/{short_url}")
# ── получаем полный JSON товара ─────────────────────
prod = summary.get("__full")
if prod is None: # старая схема
ref_id = summary["detail"]["reference"].split("-")[0]
api = (f"{BASE_API}/productsArray"
f"?languageId=-1&referenceIds={ref_id}&appId=1")
data = parser.parse(api, return_type="json")
if not data or "products" not in data:
print(f"Skip (no data) → {short_url}"); continue
prod = data["products"][0]
det = prod["detail"]
url_full = f"https://www.zarahome.com/pl/en/{prod.get('productUrl','')}"
article = det["displayReference"]
name = prod["name"]
descr = det["longDescription"]
# ── перед блоком "все изображения" ───────────────────────────────
print("DETAIL KEYS:", list(det.keys())[:20]) # покажем первые 20 ключей
print(
textwrap.indent(
json.dumps(det, ensure_ascii=False, indent=2), # полный JSON
prefix=" " # немного отступа
)
)
# ─────────────────────────────────────────────────────────────────
# ── ВСЕ ИЗОБРАЖЕНИЯ ──────────────────────────────────────────────
# raw_xmedia → либо список set-ов, либо None
raw_xmedia = (det.get("xmedia") or
prod.get("xmedia") or
[])
# default_idx → целое число (индекс) либо None
default_idx = det.get("xmediaDefaultSet")
# получаем список наборов, которые надо разобрать
if isinstance(raw_xmedia, list) and raw_xmedia:
if isinstance(default_idx, int):
media_sets = [raw_xmedia[default_idx]] # только дефолтный
else:
media_sets = raw_xmedia # все наборы
elif isinstance(raw_xmedia, dict):
media_sets = [raw_xmedia] # иногда словарь
else:
media_sets = []
all_imgs = [
f"https://static.zarahome.net/8/photos4{loc['path']}/{m['idMedia']}2.jpg"
for loc in media_sets
for m in loc["xmediaItems"][0]["medias"]
]
all_imgs_s = "\n".join(all_imgs)
# состав
colors_list = det.get("colors") or [] # может быть []
####
colors_list = det.get("colors") or []
if not colors_list: # псевдо-цвет
colors_list = [{
"id": 0,
"name": "DEFAULT",
"image": {"url": ""},
"sizes": [{
"visibilityValue": "SHOW",
"name": "",
"description": "",
"weight": prod.get("weight", ""),
"price": prod.get("price", 0)
}]
}]
##
comp_block = det.get("compositionDetail") or \
(colors_list[0].get("compositionDetail") if colors_list else None)
comp_txt = ""
if comp_block and comp_block.get("parts"):
comp_txt = "\n".join(
extract_components_zarahome(comp_block["parts"])
)
# уход
care = "\n".join(c["description"] for c in det["care"])
# traceability
trace = ""
if colors_list and colors_list[0].get("traceability"):
trace = "\n".join(
f"{v['name']}\n" + "\n".join(v["country"])
for v in colors_list[0]["traceability"].values()
if isinstance(v, dict) and v.get("country") and v.get("name")
)
# ── цвета / размеры ─────────────────────────────────
serial = 0
rows = []
if not colors_list: # у товара вообще нет вариантов цвета
continue # переходим к следующему товару
for clr in colors_list:
if clr["image"] is None: continue
clr_code = clr.get("id")
clr_name = clr.get("name", "")
# безопасно строим картинку: если поля нет — остаётся пусто
clr_image = ""
if clr.get("image") and clr["image"].get("url"):
clr_image = f"https://static.zarahome.net/8/photos4{clr['image']['url']}_3_1_5.jpg"
# ── ИЗОБРАЖЕНИЯ ЭТОГО ЦВЕТА ─────────────────────────────────────
raw_xmedia = (det.get("xmedia") or
prod.get("xmedia") or
[])
default_idx = det.get("xmediaDefaultSet")
if isinstance(raw_xmedia, list) and raw_xmedia:
media_sets = [raw_xmedia[default_idx]] if isinstance(default_idx, int) else raw_xmedia
elif isinstance(raw_xmedia, dict):
media_sets = [raw_xmedia]
else:
media_sets = []
clr_imgs = [
f"https://static.zarahome.net/8/photos4{loc['path']}/{m['idMedia']}2.jpg"
for loc in media_sets
if loc.get("colorCode") == clr_code
for m in loc["xmediaItems"][0]["medias"]
]
clr_imgs_s = "\n".join(clr_imgs)
for size in clr["sizes"]:
if size["visibilityValue"] != "SHOW": continue
suffix = "" if serial == 0 else f"-{serial}"
serial += 1
size_name = size["name"]
size_descr = size["description"]
size_full = f"{size_descr} ({size_name})" if size_descr else size_name
weight = size.get("weight") or prod.get("weight", "")
buy_price = int(size.get("price") or prod.get("price", 0)) / 100
rows.append([
url_full,
f"{article}{suffix}",
name,
descr,
clr_image,
clr_name,
size_full,
buy_price,
weight,
all_imgs_s,
clr_imgs_s,
comp_txt,
care,
trace,
cat_path,
USER_BRAND
])
table += rows
# ── сохраняем категорию ────────────────────────────────
csv_name = category.split("/")[-1]
recorder.record(csv_name, table)
def get_extractor():
with open(abspath("parse_settings.json"), "r", encoding="utf-8") as file:
return Extractor(load(file))