131 lines
4.2 KiB
Python
131 lines
4.2 KiB
Python
import os
|
||
import sys
|
||
import traceback
|
||
import pandas as pd
|
||
from datetime import datetime
|
||
|
||
# === Вспомогательное ===
|
||
def get_script_dir() -> str:
|
||
try:
|
||
return os.path.dirname(os.path.abspath(__file__))
|
||
except NameError:
|
||
return os.getcwd()
|
||
|
||
def log(msg: str):
|
||
ts = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
||
line = f'[{ts}] {msg}'
|
||
print(line)
|
||
try:
|
||
with open(log_file, 'a', encoding='utf-8') as f:
|
||
f.write(line + '\n')
|
||
except Exception:
|
||
pass
|
||
|
||
def is_temp_or_hidden(name: str) -> bool:
|
||
return name.startswith('~$') or name.startswith('.')
|
||
|
||
# === Пути ===
|
||
script_dir = get_script_dir()
|
||
folder_path = os.path.join(script_dir, 'Files-todo')
|
||
|
||
timestamp = datetime.now().strftime('%Y%m%d-%H%M')
|
||
output_filename = f'All-todo-{timestamp}.xlsx'
|
||
output_file = os.path.join(folder_path, output_filename)
|
||
|
||
log_file = os.path.join(folder_path, 'merge_log.txt')
|
||
|
||
BASE_URL = "https://www.zarahome.com/pl/en/"
|
||
|
||
def main():
|
||
log('=== Старт объединения Excel-файлов ===')
|
||
log(f'Папка: {folder_path}')
|
||
|
||
if not os.path.isdir(folder_path):
|
||
log(f'ОШИБКА: Папка не найдена: {folder_path}')
|
||
sys.exit(1)
|
||
|
||
names = sorted(os.listdir(folder_path))
|
||
files = []
|
||
for name in names:
|
||
if is_temp_or_hidden(name):
|
||
continue
|
||
if not name.lower().endswith('.xlsx'):
|
||
continue
|
||
if name.startswith('All-todo-'):
|
||
continue
|
||
full = os.path.join(folder_path, name)
|
||
if os.path.isfile(full):
|
||
files.append(full)
|
||
|
||
if not files:
|
||
log('Нет входных .xlsx файлов для слияния. Завершение.')
|
||
return
|
||
|
||
log(f'Найдено файлов: {len(files)}')
|
||
|
||
dfs = []
|
||
all_columns = []
|
||
seen_cols = set()
|
||
processed = 0
|
||
errors = 0
|
||
|
||
for i, path in enumerate(files, start=1):
|
||
fname = os.path.basename(path)
|
||
try:
|
||
df = pd.read_excel(path, engine='openpyxl')
|
||
if df is None or df.empty:
|
||
log(f'ПРЕДУПРЕЖДЕНИЕ: Пустой файл — пропуск: {fname}')
|
||
continue
|
||
|
||
if not all_columns:
|
||
for c in df.columns:
|
||
if c not in seen_cols:
|
||
all_columns.append(c)
|
||
seen_cols.add(c)
|
||
|
||
# Добавляем источники
|
||
stem, _ = os.path.splitext(fname)
|
||
df['SourceFile'] = fname
|
||
df['SourceFileUrl'] = BASE_URL + stem
|
||
|
||
dfs.append(df)
|
||
processed += 1
|
||
log(f'[{i}/{len(files)}] OK: {fname} — строк: {len(df)}')
|
||
except Exception as e:
|
||
errors += 1
|
||
log(f'[{i}/{len(files)}] ОШИБКА: {fname}: {e}')
|
||
log(traceback.format_exc())
|
||
|
||
if not dfs:
|
||
log('Все файлы пустые или не прочитаны — нечего сохранять.')
|
||
return
|
||
|
||
combined = pd.concat(dfs, ignore_index=True)
|
||
|
||
# порядок колонок: сначала из первого удачного файла, потом прочие (кроме служебных) по алфавиту,
|
||
# затем служебные SourceFile и SourceFileUrl в конце
|
||
extra_cols = [c for c in combined.columns if c not in all_columns and c not in ('SourceFile', 'SourceFileUrl')]
|
||
extra_cols.sort()
|
||
final_cols = all_columns + extra_cols
|
||
if 'SourceFile' not in final_cols:
|
||
final_cols.append('SourceFile')
|
||
if 'SourceFileUrl' not in final_cols:
|
||
final_cols.append('SourceFileUrl')
|
||
|
||
combined = combined.reindex(columns=final_cols)
|
||
|
||
try:
|
||
with pd.ExcelWriter(output_file, engine='openpyxl') as writer:
|
||
combined.to_excel(writer, index=False, sheet_name='Sheet')
|
||
log(f'Готово: {output_file}')
|
||
log(f'Итоговых строк: {len(combined)}')
|
||
log(f'Успешно обработано файлов: {processed}, ошибок: {errors}')
|
||
except Exception as e:
|
||
log(f'ОШИБКА сохранения {output_file}: {e}')
|
||
log(traceback.format_exc())
|
||
|
||
log('=== Готово ===')
|
||
|
||
if __name__ == '__main__':
|
||
main()
|