import * as pdfjsLib from 'pdfjs-dist/build/pdf.mjs'; import fs from 'fs'; const pdfPath = 'd:/Users/chenjun/kyj-yanglao-web-new/src/views/elderly/apply/check-in/颐年集团养老服务合同-2026年6月终版docx.pdf'; const outPath = 'd:/Users/chenjun/kyj-yanglao-web-new/pdf_full_text.txt'; async function main() { const data = new Uint8Array(fs.readFileSync(pdfPath)); const loadingTask = pdfjsLib.getDocument({ data }); const pdf = await loadingTask.promise; console.log('Total pages:', pdf.numPages); let allText = ''; for (let i = 1; i <= pdf.numPages; i++) { const page = await pdf.getPage(i); const content = await page.getTextContent(); const lines = []; let lastY = null; let currentLine = ''; for (const item of content.items) { const y = Math.round(item.transform[5] * 10) / 10; if (lastY !== null && Math.abs(y - lastY) > 5) { if (currentLine.trim()) lines.push(currentLine); currentLine = ''; } currentLine += item.str; lastY = y; } if (currentLine.trim()) lines.push(currentLine); const pageText = lines.join('\n'); allText += `\n===== PAGE ${i} =====\n${pageText}\n`; console.log(`Page ${i}: ${pageText.length} chars`); } fs.writeFileSync(outPath, allText, 'utf8'); console.log('Saved to', outPath); } main().catch(err => { console.error('ERROR:', err); process.exit(1); });