xiongxing
/
kyj-yanglao-web-new


			
				
					
						
						
							1234567891011121314151617
							import pdfplumber
import json, os, sys

pdf_path = r"d:\Users\chenjun\kyj-yanglao-web-new\src\views\elderly\apply\check-in\颐年集团养老服务合同-2026年6月终版docx.pdf"

with pdfplumber.open(pdf_path) as pdf:
    pages = []
    for i, page in enumerate(pdf.pages):
        text = page.extract_text() or ""
        pages.append({"page": i+1, "text": text})
        print("=== PAGE %d (chars: %d) ===" % (i+1, len(text)))
        print(text)
        print()
    out_path = r"d:\Users\chenjun\kyj-yanglao-web-new\contract_pages.json"
    with open(out_path, "w", encoding="utf-8") as f:
        json.dump(pages, f, ensure_ascii=False, indent=2)
    print("Saved to", out_path)