import sys import os pdf_path = r'd:\Users\chenjun\kyj-yanglao-web-new\src\views\elderly\apply\check-in\颐年集团养老服务合同-2026年6月终版docx.pdf' out_path = r'd:\Users\chenjun\kyj-yanglao-web-new\pdf_full_text.txt' try: import pdfplumber except ImportError: import subprocess subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'pdfplumber', '--user']) import pdfplumber with pdfplumber.open(pdf_path) as pdf: print(f'Total pages: {len(pdf.pages)}') all_text = '' for i, page in enumerate(pdf.pages): text = page.extract_text() or '' all_text += f'\n===== PAGE {i+1} =====\n{text}\n' print(f'Page {i+1}: {len(text)} chars') with open(out_path, 'w', encoding='utf-8') as f: f.write(all_text) print(f'Saved to {out_path}')