import pdfplumber import json, sys, os pdf_path = r'd:\Users\chenjun\kyj-yanglao-web-new\src\views\elderly\apply\check-in\颐年集团养老服务合同-2026年6月终版docx.pdf' with pdfplumber.open(pdf_path) as pdf: pages = [] for i, page in enumerate(pdf.pages): text = page.extract_text() or '' pages.append({'page': i+1, 'text': text}) print(f'=== PAGE {i+1} (chars: {len(text)}) ===') print(text[:2000]) if len(text) > 2000: print('...(truncated)') print() out_path = r'd:\Users\chenjun\kyj-yanglao-web-new\contract_pages.json' with open(out_path, 'w', encoding='utf-8') as f: json.dump(pages, f, ensure_ascii=False, indent=2) print(f'\nTotal: {len(pages)} pages. Saved to {out_path}')