| 1234567891011121314151617 |
- import pdfplumber
- import json, os, sys
- pdf_path = r"d:\Users\chenjun\kyj-yanglao-web-new\src\views\elderly\apply\check-in\颐年集团养老服务合同-2026年6月终版docx.pdf"
- with pdfplumber.open(pdf_path) as pdf:
- pages = []
- for i, page in enumerate(pdf.pages):
- text = page.extract_text() or ""
- pages.append({"page": i+1, "text": text})
- print("=== PAGE %d (chars: %d) ===" % (i+1, len(text)))
- print(text)
- print()
- out_path = r"d:\Users\chenjun\kyj-yanglao-web-new\contract_pages.json"
- with open(out_path, "w", encoding="utf-8") as f:
- json.dump(pages, f, ensure_ascii=False, indent=2)
- print("Saved to", out_path)
|