xiongxing
/
kyj-yanglao-web-new


			
				
					
						
						
							123456789101112131415161718192021222324
							import sys
import os

pdf_path = r'd:\Users\chenjun\kyj-yanglao-web-new\src\views\elderly\apply\check-in\颐年集团养老服务合同-2026年6月终版docx.pdf'
out_path = r'd:\Users\chenjun\kyj-yanglao-web-new\pdf_full_text.txt'

try:
    import pdfplumber
except ImportError:
    import subprocess
    subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'pdfplumber', '--user'])
    import pdfplumber

with pdfplumber.open(pdf_path) as pdf:
    print(f'Total pages: {len(pdf.pages)}')
    all_text = ''
    for i, page in enumerate(pdf.pages):
        text = page.extract_text() or ''
        all_text += f'\n===== PAGE {i+1} =====\n{text}\n'
        print(f'Page {i+1}: {len(text)} chars')
    
    with open(out_path, 'w', encoding='utf-8') as f:
        f.write(all_text)
    print(f'Saved to {out_path}')