From 00048eb2bd306e95acbc8b201b77d07a4480af8a Mon Sep 17 00:00:00 2001
From: va1is <va0880lis@gmail.com>
Date: Wed, 1 Oct 2025 16:21:00 +0300
Subject: [PATCH] =?UTF-8?q?IKEA-api-=D0=BF=D0=B5=D1=80=D0=B5=D0=BD=D0=BE?=
 =?UTF-8?q?=D1=81=20=D0=BD=D0=B0=20=D0=BF=D1=80=D0=BE=D0=B4?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 Парсер_IKEA/main.py           |  26 +-
 Парсер_IKEA/main0.py          | 723 ++++++++++++++++++++++++++++++++++
 Парсер_IKEA/main_win proxy.py |   2 +-
 3 files changed, 741 insertions(+), 10 deletions(-)
 create mode 100644 Парсер_IKEA/main0.py
diff --git a/Парсер_IKEA/main.py b/Парсер_IKEA/main.py
index fb7d810..960db85 100644
--- a/Парсер_IKEA/main.py
+++ b/Парсер_IKEA/main.py
@@ -1,3 +1,4 @@
+
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 
@@ -24,7 +25,7 @@ POST_LOG       = os.path.join(RECORDS_DIR, "post_log.txt")
 
 Ответ: 1 (да) / 0 (нет). Пустой ввод = 1.
 '''
-POST_URL     = os.getenv("IKEA_POST_URL", "http://localhost:3005/parser/data")
+POST_URL     = os.getenv("IKEA_POST_URL", "http://172.25.4.101:3005/parser/data")
 POST_API_KEY = os.getenv("IKEA_POST_API_KEY", "")
 POST_TIMEOUT = 20
 BATCH_SIZE   = 50
@@ -248,7 +249,9 @@ def format_dimensions(raw_dim_props, with_html=False, translated=False):
     br = "<br/>" if with_html else "\n"
 
     title = translate_token("Wymiary") if translated else "Wymiary"
-    lines.append(f"<strong>{title}</strong>" if with_html else title)
+    
+    lines.append(f"{title}" if with_html else title)
+    #lines.append(f"<strong>{title}</strong>" if with_html else title)
 
     for d in raw_dim_props.get("dimensions", []):
         name = d.get("name", "")
@@ -265,7 +268,8 @@ def format_dimensions(raw_dim_props, with_html=False, translated=False):
     pack = (raw_dim_props.get("packaging") or {})
     pack_title = translate_token("Opakowanie") if translated else "Opakowanie"
     lines.append(br if with_html else "")
-    lines.append(f"<strong>{pack_title}</strong>" if with_html else pack_title)
+    lines.append(f"{pack_title}" if with_html else pack_title)
+    #lines.append(f"<strong>{pack_title}</strong>" if with_html else pack_title)
 
     content = (pack.get("contentProps") or {}).get("packages") or []
     for pkg in content:
@@ -322,7 +326,8 @@ def format_product_details(raw_details, add_summary_desc="", with_html=False, sk
         out.append(br if with_html else "")
 
     t1 = "Informacje o produkcie"
-    out.append(f"<strong>{t1}</strong>" if with_html else t1)
+    out.append(f"{t1}" if with_html else t1)
+    #out.append(f"<strong>{t1}</strong>" if with_html else t1)
     pd = (raw_details.get("productDescriptionProps") or {})
     paragraphs = pd.get("paragraphs") or []
     for p in paragraphs:
@@ -343,7 +348,8 @@ def format_product_details(raw_details, add_summary_desc="", with_html=False, sk
     if gk:
         out.append(br if with_html else "")
         t2 = "Dobrze wiedzieć"
-        out.append(f"<strong>{t2}</strong>" if with_html else t2)
+        out.append(f"{t2}" if with_html else t2)
+        #out.append(f"<strong>{t2}</strong>" if with_html else t2)
         for item in gk:
             txt = item.get("text")
             if txt:
@@ -356,7 +362,8 @@ def format_product_details(raw_details, add_summary_desc="", with_html=False, sk
     t3 = "Materiały i pielęgnacja"
     if mats or care:
         out.append(br if with_html else "")
-        out.append(f"<strong>{t3}</strong>" if with_html else t3)
+        out.append(f"{t3}" if with_html else t3)
+        #out.append(f"<strong>{t3}</strong>" if with_html else t3)
 
     if mats:
         out.append("Materiały")
@@ -385,7 +392,8 @@ def format_product_details(raw_details, add_summary_desc="", with_html=False, sk
     if sc:
         out.append(br if with_html else "")
         t4 = "Bezpieczeństwo i zgodność z przepisami"
-        out.append(f"<strong>{t4}</strong>" if with_html else t4)
+        out.append(f"{t4}" if with_html else t4)
+        #out.append(f"<strong>{t4}</strong>" if with_html else t4)
         for s in sc:
             txt = s.get("text")
             if txt:
@@ -597,8 +605,8 @@ def build_variant(row: dict) -> dict:
     }
 
     return {
-        #"category": {"name": category_name},
-        "category": {"name": "TEST/IKEA"},
+        "category": {"name": category_name},
+        #"category": {"name": "TEST/IKEA"},
         "brand":    {"name": "ikea"},
         "variant":  variant,
     }
diff --git a/Парсер_IKEA/main0.py b/Парсер_IKEA/main0.py
new file mode 100644
index 0000000..9fcb4a1
--- /dev/null
+++ b/Парсер_IKEA/main0.py
@@ -0,0 +1,723 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import os, json, re, math, time, html, requests, datetime
+from bs4 import BeautifulSoup
+from openpyxl import Workbook
+
+# ───────────────────────── ПУТИ / ФАЙЛЫ ───────────────────────────
+BASE_DIR       = os.path.dirname(os.path.abspath(__file__))
+RECORDS_DIR    = os.path.join(BASE_DIR, "records_folder")
+os.makedirs(RECORDS_DIR, exist_ok=True)
+
+INPUT_FILE     = os.path.join(BASE_DIR, "product_links.txt")
+OUTPUT_FILE    = os.path.join(RECORDS_DIR, "records.xlsx")
+DICT_FILE      = os.path.join(BASE_DIR, "dictionary_main.txt")
+EXCL_FILE      = os.path.join(BASE_DIR, "exclusion_materials.txt")
+POST_LOG       = os.path.join(RECORDS_DIR, "post_log.txt")
+
+# ───────────────────────── НАСТРОЙКИ POST ─────────────────────────
+'''
+На старте спросим:
+- сохранять ли JSON батчи на диск
+- отправлять ли батчи на API
+
+Ответ: 1 (да) / 0 (нет). Пустой ввод = 1.
+'''
+POST_URL     = os.getenv("IKEA_POST_URL", "http://172.25.4.101:3005/parser/data")
+POST_API_KEY = os.getenv("IKEA_POST_API_KEY", "")
+POST_TIMEOUT = 20
+BATCH_SIZE   = 50
+
+# ───────────────────────── НАСТРОЙКИ САЙТА ────────────────────────
+HEADERS = {"User-Agent": "Mozilla/5.0"}
+CSS_SELECTOR = ".pip-product__subgrid.product-pip.js-product-pip"
+
+BLOCKS = [
+    "buyModule",
+    "productSummary",
+    "pipPricePackage",
+    "productInformationSection",
+    "keyFacts",
+    "stockcheckSection",
+    "availabilityGroup",
+    "productGallery",
+]
+
+'''
+Whitelist колонок для Excel.
+'''
+KEEP_COLUMNS = [
+    "availabilityGroup.serverOnlineSellable",
+    "availabilityGroup.storeHeader",
+    "buyModule.onlineSellable",
+    "buyModule.productName",
+    "buyModule.productPrice",
+    "buyModule.productType",
+    "keyFacts.ariaLabels",
+    "keyFacts.gaLabel",
+    "keyFacts.keyFacts",
+    "keyFacts.keyFacts_formatted",
+    "pipPricePackage.measurementText",
+    "pipPricePackage.productDescription",
+    "productGallery.urls",
+    "productInformationSection.dimensionProps",
+    "productInformationSection.dimensionProps_formatted",
+    "productInformationSection.dimensionProps_formatted_html_translated",
+    "productInformationSection.productDetailsProps",
+    "productInformationSection.productDetailsProps_formatted",
+    "productInformationSection.productDetailsProps_formatted_html",
+    "productSummary.description",
+    "productSummary.visibleItemNo",
+    "stockcheckSection.packagingProps",
+    "stockcheckSection.typeName",
+    "total brutto",
+    "prductVariantColorMeasure",
+    "categoryBreadcrumb",
+    "originalName",  # ### NEW: колонка для Excel
+    "url",
+]
+
+# ───────────────────────── УТИЛИТЫ I/O ────────────────────────────
+def ask_bool(prompt: str, default: str = "1") -> bool:
+    '''
+    Спрашивает 1/0; пустой ввод → default.
+    '''
+    try:
+        val = input(f"{prompt} (1=yes, 0=no) [{default}]: ").strip() or default
+    except EOFError:
+        val = default
+    return val == "1"
+
+def _post_log(msg: str):
+    '''Пишем строку в post_log.txt (молча игнорируем ошибки).'''
+    try:
+        with open(POST_LOG, "a", encoding="utf-8") as f:
+            f.write(msg.rstrip() + "\n")
+    except Exception:
+        pass
+
+def _now_tag():
+    return datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
+
+def _save_json_batch(payload: dict, batch_index: int):
+    fname = f"ikea_batch_{_now_tag()}_{batch_index:04d}.json"
+    fpath = os.path.join(RECORDS_DIR, fname)
+    with open(fpath, "w", encoding="utf-8") as fh:
+        json.dump(payload, fh, ensure_ascii=False, indent=2)
+    print(f"💾 JSON saved: {fname}")
+    return fpath
+
+# ───────────────────────── СЛОВАРИ / ФИЛЬТРЫ ──────────────────────
+def load_dictionary(path: str) -> dict:
+    '''
+    Читает словарь переводов:
+      "Wymiary" : "Размеры",
+      ...
+    '''
+    if not os.path.isfile(path):
+        return {}
+    txt = open(path, "r", encoding="utf-8").read()
+    pairs = re.findall(r'"([^"]+)"\s*:\s*"([^"]+)"', txt)
+    return {k: v for k, v in pairs}
+
+DICT = load_dictionary(DICT_FILE)
+
+def translate_token(token: str) -> str:
+    return DICT.get(token, token)
+
+def load_exclusions(path: str) -> set:
+    '''
+    Загружает токены исключений из exclusion_materials.txt:
+     - можно по одному на строку
+     - или через запятую
+     - регистр игнорируем
+    '''
+    if not os.path.isfile(path):
+        return set()
+    txt = open(path, "r", encoding="utf-8").read()
+    # сначала из кавычек, если есть:
+    quoted = re.findall(r'"([^"]+)"', txt, flags=re.S)
+    tokens = quoted if quoted else re.split(r"[,;\n\r]+", txt)
+    return {t.strip().lower() for t in tokens if t.strip()}
+
+EXCLUSIONS = load_exclusions(EXCL_FILE)
+
+def materials_from_details_json(details: dict) -> list[str]:
+    '''
+    Извлекаем ВСЕ строки из ключей "material" на любой глубине productDetailsProps.
+    Встречаются разные схемы, поэтому делаем обход рекурсивно.
+    '''
+    out = []
+    def walk(node):
+        if isinstance(node, dict):
+            for k, v in node.items():
+                if k == "material" and isinstance(v, str):
+                    out.append(v)
+                else:
+                    walk(v)
+        elif isinstance(node, list):
+            for x in node:
+                walk(x)
+    walk(details or {})
+    return out
+
+def materials_match_exclusions(details: dict, exclusion_tokens: set) -> bool:
+    '''
+    True — если хоть один токен встречается в любом material (case-insensitive).
+    '''
+    if not exclusion_tokens:
+        return False
+    mats = materials_from_details_json(details)
+    joined = "\n".join(mats).lower()
+    return any(tok in joined for tok in exclusion_tokens)
+
+# ───────────────────────── ФОРМАТТЕРЫ ─────────────────────────────
+def _parse_json_value(val):
+    if isinstance(val, (dict, list)) or val is None:
+        return val
+    if isinstance(val, str):
+        s = val.strip()
+        if not s:
+            return val
+        try:
+            return json.loads(s)
+        except Exception:
+            return val
+    return val
+
+def flatten_block(block_name, data):
+    if not isinstance(data, dict):
+        return {}
+    flat = {}
+    for k, v in data.items():
+        if block_name == "productGallery" and k == "mediaList":
+            if isinstance(v, list):
+                urls = []
+                for item in v:
+                    content = item.get("content", {})
+                    if isinstance(content, dict) and "url" in content:
+                        urls.append(content["url"])
+                flat["productGallery.urls"] = "\n".join(urls)
+            return flat
+        key = f"{block_name}.{k}"
+        flat[key] = v
+    return flat
+
+def format_keyfacts(raw_keyfacts):
+    if not isinstance(raw_keyfacts, list):
+        return ""
+    out = []
+    header_added = False
+    for el in raw_keyfacts:
+        lbl = (el or {}).get("label")
+        name = (el or {}).get("name", "Właściwości")
+        if not header_added:
+            out.append(name)
+            header_added = True
+        if lbl:
+            out.append(lbl)
+    return "\n".join(out)
+
+def _fmt_float(x):
+    try:
+        return f"{float(x):.2f}".rstrip("0").rstrip(".")
+    except Exception:
+        return ""
+
+def _collect_packaging_total_kg(packaging):
+    total = 0.0
+    if not isinstance(packaging, dict):
+        return total
+    content = (packaging.get("contentProps") or {}).get("packages") or []
+    for pkg in content:
+        qty = ((pkg.get("quantity") or {}).get("value")) or 1
+        ms  = pkg.get("measurements") or []
+        for block in ms:
+            if not isinstance(block, list):
+                continue
+            weight_lbl = next((m for m in block if (m.get("type") == "weight" or m.get("label") == "Waga")), None)
+            if weight_lbl and isinstance(weight_lbl.get("value"), (int, float)):
+                total += float(weight_lbl["value"]) * (qty or 1)
+    return total
+
+def format_dimensions(raw_dim_props, with_html=False, translated=False):
+    if not isinstance(raw_dim_props, dict):
+        return ""
+    lines = []
+    br = "<br/>" if with_html else "\n"
+
+    title = translate_token("Wymiary") if translated else "Wymiary"
+    lines.append(f"<strong>{title}</strong>" if with_html else title)
+
+    for d in raw_dim_props.get("dimensions", []):
+        name = d.get("name", "")
+        meas = d.get("measure", "")
+        if not name and not meas:
+            continue
+        if translated:
+            name_t = translate_token(name)
+            line = f"{name_t}: {meas}".strip()
+        else:
+            line = f"{name}: {meas}".strip()
+        lines.append(line)
+
+    pack = (raw_dim_props.get("packaging") or {})
+    pack_title = translate_token("Opakowanie") if translated else "Opakowanie"
+    lines.append(br if with_html else "")
+    lines.append(f"<strong>{pack_title}</strong>" if with_html else pack_title)
+
+    content = (pack.get("contentProps") or {}).get("packages") or []
+    for pkg in content:
+        name = pkg.get("name") or ""
+        if name:
+            lines.append(name)
+
+        art = (pkg.get("articleNumber") or {}).get("value")
+        if art:
+            art_lbl = "Numer artykułu"
+            if translated:
+                art_lbl = translate_token(art_lbl)
+            lines.append(art_lbl)
+            lines.append(f"{art}")
+
+        ms = pkg.get("measurements") or []
+        for block in ms:
+            if not isinstance(block, list):
+                continue
+            for m in block:
+                lbl = m.get("label", "")
+                txt = m.get("text", "")
+                if translated:
+                    lbl = translate_token(lbl) if lbl else lbl
+                if lbl or txt:
+                    lines.append(f"{lbl}: {txt}".strip(": "))
+
+        q_val = ((pkg.get("quantity") or {}).get("value"))
+        if q_val:
+            q_lbl = "Paczka(i)"
+            if translated:
+                q_lbl = translate_token(q_lbl)
+            lines.append(f"{q_lbl}: {q_val}")
+
+    if with_html:
+        s = br.join([x for x in lines if x is not None])
+        s = re.sub(r"(" + re.escape(br) + r"){2,}", br*2, s)
+        s = s.strip(br)
+        # ### NEW: страховка — иногда первая "<" теряется в Excel-предпросмотре
+        if s.startswith("strong>"):
+            s = "<" + s
+        return s
+    return "\n".join([x for x in lines if x is not None]).strip()
+
+def format_product_details(raw_details, add_summary_desc="", with_html=False, skip_assembly=True):
+    if not isinstance(raw_details, dict):
+        return add_summary_desc if with_html else add_summary_desc
+
+    br = "<br/>" if with_html else "\n"
+    out = []
+
+    if add_summary_desc:
+        out.append(add_summary_desc)
+        out.append(br if with_html else "")
+
+    t1 = "Informacje o produkcie"
+    out.append(f"<strong>{t1}</strong>" if with_html else t1)
+    pd = (raw_details.get("productDescriptionProps") or {})
+    paragraphs = pd.get("paragraphs") or []
+    for p in paragraphs:
+        out.append(p)
+
+    dlabel = pd.get("designerLabel")
+    dname  = pd.get("designerName")
+    if dlabel and dname:
+        out.append(dlabel)
+        out.append(dname)
+
+    if raw_details.get("productId"):
+        out.append("Numer artykułu")
+        out.append(raw_details["productId"])
+
+    acc = (raw_details.get("accordionObject") or {})
+    gk = ((acc.get("goodToKnow") or {}).get("contentProps") or {}).get("goodToKnow") or []
+    if gk:
+        out.append(br if with_html else "")
+        t2 = "Dobrze wiedzieć"
+        out.append(f"<strong>{t2}</strong>" if with_html else t2)
+        for item in gk:
+            txt = item.get("text")
+            if txt:
+                out.append(txt)
+
+    mac = (acc.get("materialsAndCare") or {}).get("contentProps") or {}
+    mats = mac.get("materials") or []
+    care = mac.get("careInstructions") or []
+
+    t3 = "Materiały i pielęgnacja"
+    if mats or care:
+        out.append(br if with_html else "")
+        out.append(f"<strong>{t3}</strong>" if with_html else t3)
+
+    if mats:
+        out.append("Materiały")
+        for m in mats:
+            ptype = m.get("productType", "")
+            for mat in (m.get("materials") or []):
+                material = mat.get("material", "")
+                if ptype:
+                    out.append(ptype)
+                if material:
+                    out.append(material)
+
+    if care:
+        detailsCareText = mac.get("detailsCareText", "Pielęgnacja")
+        out.append(detailsCareText)
+        for c in care:
+            ptype = c.get("productType", "")
+            texts = c.get("texts") or []
+            if ptype:
+                out.append(ptype)
+            for t in texts:
+                out.append(t)
+
+    safety = (raw_details.get("safetyAndCompliance") or {}).get("contentProps") or {}
+    sc = safety.get("safetyAndCompliance") or []
+    if sc:
+        out.append(br if with_html else "")
+        t4 = "Bezpieczeństwo i zgodność z przepisami"
+        out.append(f"<strong>{t4}</strong>" if with_html else t4)
+        for s in sc:
+            txt = s.get("text")
+            if txt:
+                out.append(txt)
+
+    '''
+    ### Был блок сборки "Instrukcja montażu" — по вашему запросу отключён.
+    if not skip_assembly:
+        ...
+    '''
+
+    if with_html:
+        s = br.join([x for x in out if x is not None])
+        s = re.sub(r"(" + re.escape(br) + r"){2,}", br*2, s)
+        return s.strip(br)
+    return "\n".join([x for x in out if x is not None]).strip()
+
+def build_variant_color_measure(desc: str, type_name: str, measurement: str) -> str:
+    s = (desc or "")
+    t = (type_name or "").strip()
+    if t:
+        pattern = r"^\s*" + re.escape(t) + r"[\s,;:\-–—/]*"
+        s = re.sub(pattern, "", s, flags=re.IGNORECASE)
+
+    if not re.search(r"[0-9A-Za-zА-Яа-яЁёÀ-ž]", s or ""):
+        s = ""
+
+    s = s.strip()
+    meas = (measurement or "").strip()
+
+    if not s:
+        return meas if meas else ""
+
+    s = s[:1].upper() + s[1:]
+    return f"{s}, {meas}" if meas else s
+
+# ───────────────────── СКРАПИНГ КАРТОЧКИ ──────────────────────────
+def extract_data(url: str) -> dict:
+    '''
+    Возвращает плоский dict с полями KEEP_COLUMNS.
+    Форматтеры/подсчёты: keyFacts_formatted, dimensionProps_formatted,
+    dimensionProps_formatted_html_translated, productDetailsProps_formatted,
+    productDetailsProps_formatted_html, total brutto, prductVariantColorMeasure, categoryBreadcrumb.
+    '''
+    try:
+        resp = requests.get(url, headers=HEADERS, timeout=15)
+        resp.raise_for_status()
+                # 🔎 DEBUG: вывести в консоль базовую информацию об ответе
+        print("\n=== FETCH DEBUG ===")
+        print("URL:        ", url)
+        print("Final URL:  ", resp.url)
+        print("Status:     ", resp.status_code)
+        print("ContentType:", resp.headers.get("Content-Type"))
+        print("Length:     ", len(resp.text))
+        print("Snippet ↓↓↓")
+        print(resp.text[:1000])  # покажет первые 1000 символов HTML
+        soup = BeautifulSoup(resp.text, "html.parser")
+
+        target = soup.select_one(CSS_SELECTOR)
+        if not target:
+            return {"url": url, "error": "CSS selector not found"}
+
+        raw = target.get("data-hydration-props")
+        if not raw:
+            return {"url": url, "error": "data-hydration-props not found"}
+
+        decoded   = html.unescape(raw)
+        full_json = json.loads(decoded)
+
+        result = {"url": url}
+        for block in BLOCKS:
+            result.update(flatten_block(block, full_json.get(block, {})))
+
+        kf_json  = _parse_json_value(result.get("keyFacts.keyFacts"))
+        dim_json = _parse_json_value(result.get("productInformationSection.dimensionProps"))
+        det_json = _parse_json_value(result.get("productInformationSection.productDetailsProps"))
+
+        result["keyFacts.keyFacts_formatted"] = format_keyfacts(kf_json)
+        result["productInformationSection.dimensionProps_formatted"] = format_dimensions(dim_json, with_html=False, translated=False)
+        html_trans = format_dimensions(dim_json, with_html=True, translated=True)
+
+        # ### NEW: дополнительная страховка — если вдруг нет '<' в начале:
+        if isinstance(html_trans, str) and html_trans.startswith("strong>"):
+            html_trans = "<" + html_trans
+
+        result["productInformationSection.dimensionProps_formatted_html_translated"] = html_trans
+
+        total_kg = _collect_packaging_total_kg((dim_json or {}).get("packaging") or {})
+        result["total brutto"] = _fmt_float(total_kg)
+
+        summary_desc = result.get("productSummary.description", "") or ""
+        result["productInformationSection.productDetailsProps_formatted"] = format_product_details(det_json, add_summary_desc=summary_desc, with_html=False, skip_assembly=True)
+        result["productInformationSection.productDetailsProps_formatted_html"] = format_product_details(det_json, add_summary_desc=summary_desc, with_html=True,  skip_assembly=True)
+
+        desc  = result.get("pipPricePackage.productDescription", "") or ""
+        tname = result.get("stockcheckSection.typeName", "") or ""
+        meas  = result.get("pipPricePackage.measurementText", "") or ""
+        result["prductVariantColorMeasure"] = build_variant_color_measure(desc, tname, meas)
+
+        # breadcrumb
+        breadcrumb = None
+        for tag in soup.find_all("script", attrs={"type": lambda t: t and "ld+json" in t}):
+            try:
+                data = json.loads(tag.string)
+            except Exception:
+                continue
+            if isinstance(data, list):
+                data = next((d for d in data if isinstance(d, dict) and d.get("@type") == "BreadcrumbList"), None)
+            if isinstance(data, dict) and data.get("@type") == "BreadcrumbList":
+                items = data.get("itemListElement", [])
+                names = [it.get("name", "") for it in items]
+                breadcrumb = "/".join(names)
+                break
+        if breadcrumb:
+            result["categoryBreadcrumb"] = breadcrumb
+
+        # применяем whitelist
+        filtered = {k: result.get(k) for k in KEEP_COLUMNS if k != "originalName"}
+
+        '''
+        ### NEW: originalName = productName + " " + typeName (без двойных пробелов)
+        '''
+        pn = (result.get("buyModule.productName") or "").strip()
+        tn = (result.get("stockcheckSection.typeName") or "").strip()
+        if pn and tn:
+            orig_name = f"{pn} {tn}"
+        else:
+            orig_name = pn or tn
+        filtered["originalName"] = orig_name
+
+        return filtered
+
+    except Exception as e:
+        print(e)
+        return {"url": url, "error": str(e)}
+
+# ───────────────────── ПОСТРОЕНИЕ ВАРИАНТА / POST ─────────────────
+def _split_color_size(text: str):
+    if not text:
+        return "", ""
+    parts = [p.strip() for p in text.split(",", 1)]
+    if len(parts) == 2:
+        return parts[0], parts[1]
+    return "", parts[0]
+
+def _ceil_price(v):
+    try:
+        return int(math.ceil(float(v)))
+    except Exception:
+        return None
+
+def _ceil_int(v):
+    try:
+        return int(math.ceil(float(v)))
+    except Exception:
+        return None
+
+def build_variant(row: dict) -> dict:
+    category_name = row.get("categoryBreadcrumb") or ""
+    brand_name    = "ikea"
+
+    visible = row.get("productSummary.visibleItemNo") or ""
+    sku = visible.replace(" ", "")
+
+    csm = (row.get("prductVariantColorMeasure") or "").strip()
+    color, size = _split_color_size(csm)
+    if not color and not size:
+        size = (row.get("pipPricePackage.measurementText") or "").strip()
+
+    cost = _ceil_price(row.get("buyModule.productPrice"))
+    url  = row.get("url") or ""
+
+    '''
+    ### NEW: originalName берём из одноимённой колонки (а не только из productName)
+    '''
+    name = row.get("originalName") or row.get("buyModule.productName") or ""
+
+    desc_html = row.get("productInformationSection.productDetailsProps_formatted_html") or ""
+
+    '''
+    ### NEW: originalComposition = HTML из dimensionProps_formatted_html_translated
+    '''
+    composition_html = row.get("productInformationSection.dimensionProps_formatted_html_translated") or ""
+
+    imgs = []
+    raw_imgs = row.get("productGallery.urls") or ""
+    if isinstance(raw_imgs, str):
+        imgs = [x for x in raw_imgs.split("\n") if x.strip()]
+
+    in_stock = bool(row.get("availabilityGroup.serverOnlineSellable"))
+    if not in_stock:
+        in_stock = bool(row.get("buyModule.onlineSellable"))
+
+    weight_kg = _ceil_int(row.get("total brutto"))
+
+    variant = {
+        "status_id": 1,
+        "color": color.capitalize() if color else "none",
+        "sku": sku,
+        "size": size,
+        "cost": cost,
+        "originalUrl": url,
+        "originalName": name,                 # ← ### NEW: в JSON сохраняем originalName
+        "originalDescription": desc_html,
+        "originalComposition": composition_html,  # ← ### NEW
+        "images": imgs,
+        "inStock": in_stock,
+        "weight": weight_kg if weight_kg is not None else 0,
+    }
+
+    return {
+        "category": {"name": category_name},
+        #"category": {"name": "TEST/IKEA"},
+        "brand":    {"name": "ikea"},
+        "variant":  variant,
+    }
+
+def post_payload(payload: dict) -> dict:
+    headers = {"Content-Type": "application/json"}
+    if POST_API_KEY:
+        headers["Authorization"] = f"Bearer {POST_API_KEY}"
+
+    body = json.dumps(payload, ensure_ascii=False)
+    _post_log(f"→ POST {POST_URL}\nHeaders: {headers}\nBody: {body}")
+
+    try:
+        r = requests.post(POST_URL, headers=headers, data=body.encode("utf-8"), timeout=POST_TIMEOUT)
+        text = r.text
+        _post_log(f"← {r.status_code}\n{text}\n{'-'*60}")
+        ok = 200 <= r.status_code < 300
+        return {"ok": ok, "status": r.status_code, "response": text}
+    except Exception as e:
+        _post_log(f"× ERROR: {e}\n{'-'*60}")
+        return {"ok": False, "status": None, "error": str(e)}
+
+# ───────────────────────── СЕРДЦЕ СКРИПТА ─────────────────────────
+def safe_cell(val):
+    if isinstance(val, (dict, list)):
+        return json.dumps(val, ensure_ascii=False)
+    return "" if val is None else val
+
+def main():
+    SAVE_JSON = ask_bool("SAVE_JSON (сохранять JSON на диск?)", "1")
+    SEND_JSON = ask_bool("SEND_JSON (отправлять на API?)", "1")
+
+    # читаем ссылки
+    with open(INPUT_FILE, "r", encoding="utf-8") as f:
+        links = [line.strip() for line in f if line.strip()]
+    print(f"Всего ссылок: {len(links)}")
+
+    # готовим Excel
+    wb = Workbook()
+    ws = wb.active
+    ws.title = "IKEA Products"
+    ws.append(KEEP_COLUMNS)
+
+    # батч для JSON/API
+    batch_items = []
+    batch_index = 1
+
+    def flush_batch():
+        nonlocal batch_items, batch_index
+        if not batch_items:
+            return
+        payload = {"parserName": "ikea", "items": batch_items}
+        if SAVE_JSON:
+            _save_json_batch(payload, batch_index)
+        if SEND_JSON:
+            res = post_payload(payload)
+            ok = res.get("ok")
+            print(f"POST batch {batch_index}: {'OK' if ok else 'FAIL'} (status={res.get('status')})")
+        batch_index += 1
+        batch_items = []
+
+    for idx, link in enumerate(links, 1):
+        print(f"[{idx}/{len(links)}] {link}")
+        row = extract_data(link)
+
+        '''
+        ### NEW: originalName уже сформирован в extract_data и попал в row
+        '''
+
+        # пишем в Excel ВСЁ (без фильтров)
+        ws.append([safe_cell(row.get(col, "")) for col in KEEP_COLUMNS])
+
+        # ФИЛЬТРЫ для JSON/API
+        try:
+            price = float(row.get("buyModule.productPrice") or 0)
+        except Exception:
+            price = 0.0
+
+        try:
+            total_kg = float(row.get("total brutto") or 0)
+        except Exception:
+            total_kg = 0.0
+
+        details_json = row.get("productInformationSection.productDetailsProps") or {}
+
+        # 1) фильтр цены
+        if not (20 <= price <= 1500):
+            pass
+        # 2) фильтр веса
+        elif total_kg > 30:
+            pass
+        # 3) фильтр материалов
+        elif materials_match_exclusions(details_json, EXCLUSIONS):
+            pass
+        else:
+            # прошёл фильтры → добавляем в батч
+            try:
+                item = build_variant(row)
+                batch_items.append(item)
+            except Exception as e:
+                _post_log(f"× build_variant error for {link}: {e}")
+
+        # авто-сейв Excel каждые 50 строк
+        if idx % 50 == 0:
+            wb.save(OUTPUT_FILE)
+            print(f"💾 autosave: {OUTPUT_FILE}")
+
+        # флаш батча при достижении лимита
+        if len(batch_items) >= BATCH_SIZE:
+            flush_batch()
+
+    # финал: дописать Excel и отправить/сохранить остаток батча
+    wb.save(OUTPUT_FILE)
+    print(f"\n✅ Excel готов: {OUTPUT_FILE}")
+
+    flush_batch()
+    print("🎯 Готово.")
+
+if __name__ == "__main__":
+    main()
+
+
diff --git a/Парсер_IKEA/main_win proxy.py b/Парсер_IKEA/main_win proxy.py
index c832d86..66defd7 100644
--- a/Парсер_IKEA/main_win proxy.py	
+++ b/Парсер_IKEA/main_win proxy.py	
@@ -50,7 +50,7 @@ PROXIES_WEB = {
 REQUEST_TIMEOUT = 20
 
 # ───────────────────────── НАСТРОЙКИ POST ─────────────────────────
-POST_URL     = os.getenv("IKEA_POST_URL", "http://localhost:3005/parser/data")
+POST_URL     = os.getenv("IKEA_POST_URL", "http://172.25.4.101:3005/parser/data")
 POST_API_KEY = os.getenv("IKEA_POST_API_KEY", "")
 POST_TIMEOUT = 20
 BATCH_SIZE   = 50