diff --git a/Parsing ZARAHOME/src/categories.xlsx b/Parsing ZARAHOME/src/categories.xlsx index 9634944..b883081 100644 Binary files a/Parsing ZARAHOME/src/categories.xlsx and b/Parsing ZARAHOME/src/categories.xlsx differ diff --git a/Parsing ZARAHOME/src/xlsx_recorder.py b/Parsing ZARAHOME/src/xlsx_recorder.py index 643a24c..977cd84 100644 --- a/Parsing ZARAHOME/src/xlsx_recorder.py +++ b/Parsing ZARAHOME/src/xlsx_recorder.py @@ -20,7 +20,7 @@ SEND_JSON = ask("SEND_JSON") # отправка POST SAVE_JSON = ask("SAVE_JSON") # сохранять копию JSON # ─────────────────────── прочие настройки ───────────────────────── -POST_URL = "http://localhost:3005/parser/data" +POST_URL = "http://172.25.4.101:3005/parser/data" MIN_PRICE = 40 MAX_PRICE = 1200 EXCLUSION_FILE = Path(__file__).with_name("exclusion_materials.txt") # в том же каталоге diff --git a/Processing/9_01_Конвертация_xlsx-to-vcf.py b/Processing/9_01_Конвертация_xlsx-to-vcf.py new file mode 100644 index 0000000..ab5469c --- /dev/null +++ b/Processing/9_01_Конвертация_xlsx-to-vcf.py @@ -0,0 +1,170 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import re +from pathlib import Path +import pandas as pd + +# ── НАСТРОЙКИ (поменяйте под себя) ──────────────────────────────────────────── +BASE_DIR = Path("/Users/valis/Downloads/") # папка, где лежат файлы +INPUT_XLSX_NAME = "Книга3.xlsx" # имя исходного XLSX +SHEET_NAME = "orders" # имя листа или None (первый лист) +OUTPUT_VCF_NAME = "contacts.vcf" # общее vcf с ВСЕМИ контактами +# ─────────────────────────────────────────────────────────────────────────────── + +# распознаваемые заголовки столбцов +FIRST_NAMES = {"first_name", "firstname", "имя", "name", "givenname", "given_name"} +LAST_NAMES = {"last_name", "lastname", "фамилия", "surname", "familyname", "family_name"} +PHONES = {"phone2", "телефон", "mobile", "mobile_phone", "тел"} +EMAILS = {"email", "e-mail", "почта", "mail"} +ORGS = {"org", "company", "компания", "организация"} +TITLES = {"title", "должность"} +NOTES = {"note", "notes", "заметки", "примечание"} + +PHONE_SPLIT_REGEX = re.compile(r"[;\n\r\t,\/\|]+") # разделители телефонов +ONLY_PLUS_DIGITS = re.compile(r"[^\d+]") +INVALID_FN_CHARS = re.compile(r"[^A-Za-z0-9А-Яа-яЁё _\-\.\(\)]") # для имён файлов + +def safe_filename(s: str, maxlen: int = 80) -> str: + s = INVALID_FN_CHARS.sub("_", s).strip() + s = re.sub(r"\s+", " ", s) + return (s[:maxlen] or "contact").strip() + +def pick_col(df, candidates): + """Найти первую подходящую колонку из набора имён (без регистра/пробелов).""" + norm = {str(c).lower().strip(): c for c in df.columns} + for want in candidates: + if want in norm: + return norm[want] + # мягкий матч: без пробелов + for k, orig in norm.items(): + if k.replace(" ", "") in candidates: + return orig + return None + +def normalize_phone(s: str) -> str: + s = str(s or "").strip() + if not s: + return "" + s = ONLY_PLUS_DIGITS.sub("", s) + if s.startswith("00"): + s = "+" + s[2:] + return s + +def split_phones(cell) -> list[str]: + raw = str(cell or "").strip() + if not raw: + return [] + parts = [p.strip() for p in PHONE_SPLIT_REGEX.split(raw) if p.strip()] + out, seen = [], set() + for p in parts: + n = normalize_phone(p) + if n and n not in seen: + seen.add(n) + out.append(n) + return out + +def build_vcard_row(row, col_first, col_last, col_phone, col_email, col_org, col_title, col_note) -> tuple[str|None, str]: + """Вернёт (vcard_text_or_None, display_name_for_filename).""" + first = str(row.get(col_first, "")).strip() if col_first else "" + last = str(row.get(col_last, "")).strip() if col_last else "" + full = (f"{first} {last}".strip() or first or last).strip() + phones = split_phones(row.get(col_phone, "")) if col_phone else [] + email = str(row.get(col_email, "")).strip() if col_email else "" + org = str(row.get(col_org, "")).strip() if col_org else "" + title = str(row.get(col_title, "")).strip() if col_title else "" + note = str(row.get(col_note, "")).strip().replace("\r", "") if col_note else "" + + if not (full or phones or email or org or title or note): + return None, "" + + lines = ["BEGIN:VCARD", "VERSION:3.0"] + if full: + ln, fn = (last or ""), (first or "") + lines.append(f"N:{ln};{fn};;;") + lines.append(f"FN:{full}") + if org: + lines.append(f"ORG:{org}") + if title: + lines.append(f"TITLE:{title}") + for p in phones: + lines.append(f"TEL;TYPE=CELL:{p}") + if email: + lines.append(f"EMAIL:{email}") + if note: + lines.append(f"NOTE:{note}") + lines.append("END:VCARD") + return "\n".join(lines) + "\n", (full or (phones[0] if phones else "contact")) + +def main(): + xlsx_path = BASE_DIR / INPUT_XLSX_NAME + out_vcf_path = BASE_DIR / OUTPUT_VCF_NAME + + if not xlsx_path.exists(): + print(f"Файл не найден: {xlsx_path}") + return + + try: + df = pd.read_excel(xlsx_path, sheet_name=SHEET_NAME, dtype=str, engine="openpyxl") + except Exception as e: + print(f"Ошибка чтения XLSX: {e}") + return + + df = df.fillna("").copy() + if df.empty: + print("В XLSX нет данных.") + return + + # подрежем полностью пустые колонки + non_empty_cols = [c for c in df.columns if not df[c].astype(str).str.strip().eq("").all()] + df = df[non_empty_cols] + if df.empty: + print("Все колонки пустые.") + return + + # нормализуем заголовки (только для поиска, сами значения не трогаем) + df.columns = [str(c).strip() for c in df.columns] + + col_first = pick_col(df, FIRST_NAMES) + col_last = pick_col(df, LAST_NAMES) + col_phone = pick_col(df, PHONES) + col_email = pick_col(df, EMAILS) + col_org = pick_col(df, ORGS) + col_title = pick_col(df, TITLES) + col_note = pick_col(df, NOTES) + + if not col_phone and not col_email: + print("Не найдены колонки с телефонами/почтой. Ожидались: phone/телефон или email/почта (любая).") + return + + # папка для индивидуальных карточек: // + per_contact_dir = BASE_DIR / xlsx_path.stem + per_contact_dir.mkdir(parents=True, exist_ok=True) + + combined = [] + count = 0 + + for idx, row in df.iterrows(): + vcard, disp = build_vcard_row(row, col_first, col_last, col_phone, col_email, col_org, col_title, col_note) + if not vcard: + continue + combined.append(vcard) + count += 1 + + # имя файла контакта: 0001_Имя Фамилия.vcf + safe_disp = safe_filename(disp) + file_name = f"{count:04d}_{safe_disp}.vcf" + (per_contact_dir / file_name).write_text(vcard, encoding="utf-8") + + if not combined: + print("Нечего экспортировать (все строки пустые).") + return + + out_vcf_path.write_text("".join(combined), encoding="utf-8") + + print("ГОТОВО ✅") + print(f"- Общее VCF: {out_vcf_path}") + print(f"- Индивидуальные карточки: {per_contact_dir} (всего {count})") + +if __name__ == "__main__": + main() diff --git a/Парсер_IKEA/main_win proxy.py b/Парсер_IKEA/main_win proxy.py index 85485d2..c832d86 100644 --- a/Парсер_IKEA/main_win proxy.py +++ b/Парсер_IKEA/main_win proxy.py @@ -689,7 +689,7 @@ def main(): details_json = row.get("productInformationSection.productDetailsProps") or {} - if not (20 <= price <= 1500): + if not (20 <= price <= 2000): pass elif total_kg > 30: pass diff --git a/Парсер_IKEA/~$leaf_categories.xlsx b/Парсер_IKEA/~$leaf_categories.xlsx deleted file mode 100644 index 0f76e2b..0000000 Binary files a/Парсер_IKEA/~$leaf_categories.xlsx and /dev/null differ