MacOS_Parsers/Processing/9_01_Конвертация_xlsx-to-vcf.py

171 lines
6.8 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import re
from pathlib import Path
import pandas as pd
# ── НАСТРОЙКИ (поменяйте под себя) ────────────────────────────────────────────
BASE_DIR = Path("/Users/valis/Downloads/") # папка, где лежат файлы
INPUT_XLSX_NAME = "Книга3.xlsx" # имя исходного XLSX
SHEET_NAME = "orders" # имя листа или None (первый лист)
OUTPUT_VCF_NAME = "contacts.vcf" # общее vcf с ВСЕМИ контактами
# ───────────────────────────────────────────────────────────────────────────────
# распознаваемые заголовки столбцов
FIRST_NAMES = {"first_name", "firstname", "имя", "name", "givenname", "given_name"}
LAST_NAMES = {"last_name", "lastname", "фамилия", "surname", "familyname", "family_name"}
PHONES = {"phone2", "телефон", "mobile", "mobile_phone", "тел"}
EMAILS = {"email", "e-mail", "почта", "mail"}
ORGS = {"org", "company", "компания", "организация"}
TITLES = {"title", "должность"}
NOTES = {"note", "notes", "заметки", "примечание"}
PHONE_SPLIT_REGEX = re.compile(r"[;\n\r\t,\/\|]+") # разделители телефонов
ONLY_PLUS_DIGITS = re.compile(r"[^\d+]")
INVALID_FN_CHARS = re.compile(r"[^A-Za-z0-9А-Яа-яЁё _\-\.\(\)]") # для имён файлов
def safe_filename(s: str, maxlen: int = 80) -> str:
s = INVALID_FN_CHARS.sub("_", s).strip()
s = re.sub(r"\s+", " ", s)
return (s[:maxlen] or "contact").strip()
def pick_col(df, candidates):
"""Найти первую подходящую колонку из набора имён (без регистра/пробелов)."""
norm = {str(c).lower().strip(): c for c in df.columns}
for want in candidates:
if want in norm:
return norm[want]
# мягкий матч: без пробелов
for k, orig in norm.items():
if k.replace(" ", "") in candidates:
return orig
return None
def normalize_phone(s: str) -> str:
s = str(s or "").strip()
if not s:
return ""
s = ONLY_PLUS_DIGITS.sub("", s)
if s.startswith("00"):
s = "+" + s[2:]
return s
def split_phones(cell) -> list[str]:
raw = str(cell or "").strip()
if not raw:
return []
parts = [p.strip() for p in PHONE_SPLIT_REGEX.split(raw) if p.strip()]
out, seen = [], set()
for p in parts:
n = normalize_phone(p)
if n and n not in seen:
seen.add(n)
out.append(n)
return out
def build_vcard_row(row, col_first, col_last, col_phone, col_email, col_org, col_title, col_note) -> tuple[str|None, str]:
"""Вернёт (vcard_text_or_None, display_name_for_filename)."""
first = str(row.get(col_first, "")).strip() if col_first else ""
last = str(row.get(col_last, "")).strip() if col_last else ""
full = (f"{first} {last}".strip() or first or last).strip()
phones = split_phones(row.get(col_phone, "")) if col_phone else []
email = str(row.get(col_email, "")).strip() if col_email else ""
org = str(row.get(col_org, "")).strip() if col_org else ""
title = str(row.get(col_title, "")).strip() if col_title else ""
note = str(row.get(col_note, "")).strip().replace("\r", "") if col_note else ""
if not (full or phones or email or org or title or note):
return None, ""
lines = ["BEGIN:VCARD", "VERSION:3.0"]
if full:
ln, fn = (last or ""), (first or "")
lines.append(f"N:{ln};{fn};;;")
lines.append(f"FN:{full}")
if org:
lines.append(f"ORG:{org}")
if title:
lines.append(f"TITLE:{title}")
for p in phones:
lines.append(f"TEL;TYPE=CELL:{p}")
if email:
lines.append(f"EMAIL:{email}")
if note:
lines.append(f"NOTE:{note}")
lines.append("END:VCARD")
return "\n".join(lines) + "\n", (full or (phones[0] if phones else "contact"))
def main():
xlsx_path = BASE_DIR / INPUT_XLSX_NAME
out_vcf_path = BASE_DIR / OUTPUT_VCF_NAME
if not xlsx_path.exists():
print(f"Файл не найден: {xlsx_path}")
return
try:
df = pd.read_excel(xlsx_path, sheet_name=SHEET_NAME, dtype=str, engine="openpyxl")
except Exception as e:
print(f"Ошибка чтения XLSX: {e}")
return
df = df.fillna("").copy()
if df.empty:
print("В XLSX нет данных.")
return
# подрежем полностью пустые колонки
non_empty_cols = [c for c in df.columns if not df[c].astype(str).str.strip().eq("").all()]
df = df[non_empty_cols]
if df.empty:
print("Все колонки пустые.")
return
# нормализуем заголовки (только для поиска, сами значения не трогаем)
df.columns = [str(c).strip() for c in df.columns]
col_first = pick_col(df, FIRST_NAMES)
col_last = pick_col(df, LAST_NAMES)
col_phone = pick_col(df, PHONES)
col_email = pick_col(df, EMAILS)
col_org = pick_col(df, ORGS)
col_title = pick_col(df, TITLES)
col_note = pick_col(df, NOTES)
if not col_phone and not col_email:
print("Не найдены колонки с телефонами/почтой. Ожидались: phone/телефон или email/почта (любая).")
return
# папка для индивидуальных карточек: <BASE_DIR>/<stem_of_xlsx>/
per_contact_dir = BASE_DIR / xlsx_path.stem
per_contact_dir.mkdir(parents=True, exist_ok=True)
combined = []
count = 0
for idx, row in df.iterrows():
vcard, disp = build_vcard_row(row, col_first, col_last, col_phone, col_email, col_org, col_title, col_note)
if not vcard:
continue
combined.append(vcard)
count += 1
# имя файла контакта: 0001_Имя Фамилия.vcf
safe_disp = safe_filename(disp)
file_name = f"{count:04d}_{safe_disp}.vcf"
(per_contact_dir / file_name).write_text(vcard, encoding="utf-8")
if not combined:
print("Нечего экспортировать (все строки пустые).")
return
out_vcf_path.write_text("".join(combined), encoding="utf-8")
print("ГОТОВО ✅")
print(f"- Общее VCF: {out_vcf_path}")
print(f"- Индивидуальные карточки: {per_contact_dir} (всего {count})")
if __name__ == "__main__":
main()