Update server.js
This commit is contained in:
188
server.js
188
server.js
@@ -18,7 +18,16 @@ app.get("/health", (_req, res) => res.status(200).send("ok"));
|
|||||||
|
|
||||||
// ---------------- Request context / logging ----------------
|
// ---------------- Request context / logging ----------------
|
||||||
|
|
||||||
|
// Defaults can be overridden by env vars.
|
||||||
|
// - REQ_TIMEOUT_MS: general request timeout
|
||||||
|
// - REQ_TIMEOUT_PDF_MS: longer timeout for multi-page PDF zips
|
||||||
const DEFAULT_REQ_TIMEOUT_MS = clampInt(process.env.REQ_TIMEOUT_MS, 5_000, 10 * 60_000, 120_000);
|
const DEFAULT_REQ_TIMEOUT_MS = clampInt(process.env.REQ_TIMEOUT_MS, 5_000, 10 * 60_000, 120_000);
|
||||||
|
const DEFAULT_REQ_TIMEOUT_PDF_MS = clampInt(
|
||||||
|
process.env.REQ_TIMEOUT_PDF_MS,
|
||||||
|
10_000,
|
||||||
|
30 * 60_000,
|
||||||
|
5 * 60_000
|
||||||
|
);
|
||||||
|
|
||||||
app.use((req, res, next) => {
|
app.use((req, res, next) => {
|
||||||
const requestId = String(req.headers["x-request-id"] || "").trim() || randomUUID();
|
const requestId = String(req.headers["x-request-id"] || "").trim() || randomUUID();
|
||||||
@@ -109,7 +118,6 @@ async function assertSupportedRasterImage(input, req) {
|
|||||||
// If it’s HEIC/HEIF, sharp may fail metadata; allow it through to WASM path.
|
// If it’s HEIC/HEIF, sharp may fail metadata; allow it through to WASM path.
|
||||||
if (looksLikeHeic(input)) return;
|
if (looksLikeHeic(input)) return;
|
||||||
|
|
||||||
// Quick probe: if sharp can’t read metadata, treat it as unsupported (415).
|
|
||||||
try {
|
try {
|
||||||
await sharp(input, { failOnError: false }).metadata();
|
await sharp(input, { failOnError: false }).metadata();
|
||||||
} catch {
|
} catch {
|
||||||
@@ -261,6 +269,93 @@ async function pdfFirstPageToJpeg(inputBuffer, opts, dpi = 300) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Page-by-page PDF rendering:
|
||||||
|
* Uses pdftoppm -f N -l N -singlefile to render one page at a time,
|
||||||
|
* keeping /tmp usage bounded (one rendered JPEG at a time).
|
||||||
|
*
|
||||||
|
* Returns { produced: number }.
|
||||||
|
*/
|
||||||
|
async function pdfPagesToZipStreamPageByPage({
|
||||||
|
inputPdfBuffer,
|
||||||
|
res,
|
||||||
|
req,
|
||||||
|
opts,
|
||||||
|
dpi,
|
||||||
|
maxPages,
|
||||||
|
outNamePrefix = "page",
|
||||||
|
}) {
|
||||||
|
const requestId = req.requestId;
|
||||||
|
const jobId = randomUUID();
|
||||||
|
|
||||||
|
const pdfPath = `/tmp/${jobId}.pdf`;
|
||||||
|
const outPrefix = `/tmp/${jobId}-${outNamePrefix}`; // pdftoppm writes `${outPrefix}.jpg`
|
||||||
|
|
||||||
|
let produced = 0;
|
||||||
|
|
||||||
|
try {
|
||||||
|
await fs.writeFile(pdfPath, inputPdfBuffer);
|
||||||
|
|
||||||
|
for (let page = 1; page <= maxPages; page++) {
|
||||||
|
if (isAborted(req, res)) break;
|
||||||
|
|
||||||
|
// Render a single page to `${outPrefix}.jpg`
|
||||||
|
await execFilePromise("pdftoppm", [
|
||||||
|
"-jpeg",
|
||||||
|
"-r",
|
||||||
|
String(dpi),
|
||||||
|
"-f",
|
||||||
|
String(page),
|
||||||
|
"-l",
|
||||||
|
String(page),
|
||||||
|
"-singlefile",
|
||||||
|
pdfPath,
|
||||||
|
outPrefix,
|
||||||
|
]);
|
||||||
|
|
||||||
|
const renderedPath = `${outPrefix}.jpg`;
|
||||||
|
|
||||||
|
// If pdftoppm produced nothing (EOF), stop cleanly.
|
||||||
|
let pageBuf;
|
||||||
|
try {
|
||||||
|
pageBuf = await fs.readFile(renderedPath);
|
||||||
|
} catch (e) {
|
||||||
|
// If it didn't write the file, assume we're past the last page.
|
||||||
|
// (Could also be a failure; but pdftoppm errors should have thrown above.)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isAborted(req, res)) {
|
||||||
|
await safeUnlink(renderedPath);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
const jpegBuf = await toJpegWithSharp(pageBuf, opts);
|
||||||
|
await safeUnlink(renderedPath);
|
||||||
|
|
||||||
|
if (isAborted(req, res)) break;
|
||||||
|
|
||||||
|
const n = String(page).padStart(3, "0");
|
||||||
|
res.archive.append(jpegBuf, { name: `${n}.jpg` });
|
||||||
|
produced++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (produced === 0 && !isAborted(req, res)) {
|
||||||
|
// If we produced nothing, treat as PDF render failure.
|
||||||
|
throw new Error("PDF render produced no pages");
|
||||||
|
}
|
||||||
|
|
||||||
|
return { produced };
|
||||||
|
} catch (e) {
|
||||||
|
console.error(JSON.stringify({ requestId, err: String(e?.stack || e) }));
|
||||||
|
throw e;
|
||||||
|
} finally {
|
||||||
|
await safeUnlink(pdfPath);
|
||||||
|
// renderedPath cleaned each iteration; safe cleanup in case of partial failures:
|
||||||
|
await safeUnlink(`${outPrefix}.jpg`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// ---------------- Endpoints ----------------
|
// ---------------- Endpoints ----------------
|
||||||
|
|
||||||
// Single JPEG output (images + PDF first page)
|
// Single JPEG output (images + PDF first page)
|
||||||
@@ -309,33 +404,25 @@ app.post("/convert", async (req, res) => {
|
|||||||
throw sharpErr;
|
throw sharpErr;
|
||||||
}
|
}
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
// Expected 415 from probe
|
|
||||||
if (e?.statusCode === 415) {
|
if (e?.statusCode === 415) {
|
||||||
return sendError(res, 415, e.code || "unsupported_media_type", e.message, requestId);
|
return sendError(res, 415, e.code || "unsupported_media_type", e.message, requestId);
|
||||||
}
|
}
|
||||||
|
|
||||||
console.error(JSON.stringify({ requestId, err: String(e?.stack || e) }));
|
console.error(JSON.stringify({ requestId, err: String(e?.stack || e) }));
|
||||||
|
|
||||||
// Don’t leak stack traces to clients
|
|
||||||
return sendError(res, 500, "conversion_failed", "Conversion failed", requestId);
|
return sendError(res, 500, "conversion_failed", "Conversion failed", requestId);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
// PDF all pages -> ZIP of JPEG pages (supports resize/quality by re-encoding each page)
|
// PDF all pages -> ZIP of JPEG pages (page-by-page rendering to keep /tmp bounded)
|
||||||
app.post("/convert/pdf", async (req, res) => {
|
app.post("/convert/pdf", async (req, res) => {
|
||||||
const requestId = req.requestId;
|
const requestId = req.requestId;
|
||||||
|
|
||||||
// More time for multi-page zips
|
// More time for multi-page zips
|
||||||
req.setTimeout(clampInt(process.env.REQ_TIMEOUT_PDF_MS, 10_000, 30 * 60_000, 5 * 60_000));
|
req.setTimeout(DEFAULT_REQ_TIMEOUT_PDF_MS);
|
||||||
res.setTimeout(clampInt(process.env.REQ_TIMEOUT_PDF_MS, 10_000, 30 * 60_000, 5 * 60_000));
|
res.setTimeout(DEFAULT_REQ_TIMEOUT_PDF_MS);
|
||||||
|
|
||||||
let archive = null;
|
let archive = null;
|
||||||
|
|
||||||
const id = randomUUID();
|
|
||||||
const pdfPath = `/tmp/${id}.pdf`;
|
|
||||||
const outDir = `/tmp/${id}-pages`;
|
|
||||||
const outPrefix = path.join(outDir, "page");
|
|
||||||
|
|
||||||
try {
|
try {
|
||||||
if (!requireAuth(req, res)) return;
|
if (!requireAuth(req, res)) return;
|
||||||
if (isAborted(req, res)) return;
|
if (isAborted(req, res)) return;
|
||||||
@@ -353,35 +440,15 @@ app.post("/convert/pdf", async (req, res) => {
|
|||||||
const dpi = clampInt(req.headers["x-pdf-dpi"], 72, 600, 300);
|
const dpi = clampInt(req.headers["x-pdf-dpi"], 72, 600, 300);
|
||||||
const maxPages = clampInt(req.headers["x-pdf-max-pages"], 1, 200, 50);
|
const maxPages = clampInt(req.headers["x-pdf-max-pages"], 1, 200, 50);
|
||||||
|
|
||||||
await fs.mkdir(outDir, { recursive: true });
|
|
||||||
await fs.writeFile(pdfPath, input);
|
|
||||||
|
|
||||||
if (isAborted(req, res)) return;
|
|
||||||
|
|
||||||
// Render all pages to JPG via poppler
|
|
||||||
await execFilePromise("pdftoppm", ["-jpeg", "-r", String(dpi), pdfPath, outPrefix]);
|
|
||||||
|
|
||||||
const files = (await fs.readdir(outDir))
|
|
||||||
.filter((f) => /^page-\d+\.jpg$/i.test(f))
|
|
||||||
.sort((a, b) => pageNum(a) - pageNum(b));
|
|
||||||
|
|
||||||
if (files.length === 0) return sendError(res, 500, "pdf_render_failed", "PDF render produced no pages", requestId);
|
|
||||||
if (files.length > maxPages) {
|
|
||||||
return sendError(
|
|
||||||
res,
|
|
||||||
413,
|
|
||||||
"pdf_too_many_pages",
|
|
||||||
`PDF has ${files.length} pages; exceeds maxPages=${maxPages}`,
|
|
||||||
requestId
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
res.status(200);
|
res.status(200);
|
||||||
res.setHeader("Content-Type", "application/zip");
|
res.setHeader("Content-Type", "application/zip");
|
||||||
res.setHeader("Content-Disposition", `attachment; filename="pdf-pages-${id}.zip"`);
|
res.setHeader("Content-Disposition", `attachment; filename="pdf-pages-${randomUUID()}.zip"`);
|
||||||
|
|
||||||
archive = archiver("zip", { zlib: { level: 6 } });
|
archive = archiver("zip", { zlib: { level: 6 } });
|
||||||
|
|
||||||
|
// Attach archive to res for the helper
|
||||||
|
res.archive = archive;
|
||||||
|
|
||||||
// Abort work if client disconnects
|
// Abort work if client disconnects
|
||||||
res.on("close", () => {
|
res.on("close", () => {
|
||||||
try {
|
try {
|
||||||
@@ -397,28 +464,22 @@ app.post("/convert/pdf", async (req, res) => {
|
|||||||
archive.on("error", (err) => {
|
archive.on("error", (err) => {
|
||||||
console.error(JSON.stringify({ requestId, err: String(err?.stack || err) }));
|
console.error(JSON.stringify({ requestId, err: String(err?.stack || err) }));
|
||||||
try {
|
try {
|
||||||
if (!res.headersSent) sendError(res, 500, "zip_failed", "ZIP creation failed", requestId);
|
// If we already started streaming, we can only end.
|
||||||
else res.end();
|
res.end();
|
||||||
} catch {}
|
} catch {}
|
||||||
});
|
});
|
||||||
|
|
||||||
archive.pipe(res);
|
archive.pipe(res);
|
||||||
|
|
||||||
// Re-encode each rendered page with resize + quality controls, then append as buffers
|
// Render and append pages one-at-a-time
|
||||||
for (let i = 0; i < files.length; i++) {
|
await pdfPagesToZipStreamPageByPage({
|
||||||
if (isAborted(req, res)) break;
|
inputPdfBuffer: input,
|
||||||
|
res,
|
||||||
const f = files[i];
|
req,
|
||||||
const n = String(i + 1).padStart(3, "0");
|
opts,
|
||||||
const pagePath = path.join(outDir, f);
|
dpi,
|
||||||
|
maxPages,
|
||||||
const pageBuf = await fs.readFile(pagePath);
|
});
|
||||||
const jpegBuf = await toJpegWithSharp(pageBuf, opts);
|
|
||||||
|
|
||||||
if (isAborted(req, res)) break;
|
|
||||||
|
|
||||||
archive.append(jpegBuf, { name: `${n}.jpg` });
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!isAborted(req, res)) {
|
if (!isAborted(req, res)) {
|
||||||
await archive.finalize();
|
await archive.finalize();
|
||||||
@@ -434,17 +495,12 @@ app.post("/convert/pdf", async (req, res) => {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// ENOENT (pdftoppm missing) or other exec failures should be clear, but still not leak stack.
|
|
||||||
const msg =
|
const msg =
|
||||||
String(e?.message || "").includes("Missing dependency: pdftoppm") ||
|
String(e?.message || "").includes("Missing dependency: pdftoppm") || String(e?.message || "").includes("ENOENT")
|
||||||
String(e?.message || "").includes("ENOENT")
|
|
||||||
? "Server missing PDF rendering dependency"
|
? "Server missing PDF rendering dependency"
|
||||||
: "Conversion failed";
|
: "Conversion failed";
|
||||||
|
|
||||||
return sendError(res, 500, "conversion_failed", msg, requestId);
|
return sendError(res, 500, "conversion_failed", msg, requestId);
|
||||||
} finally {
|
|
||||||
await safeUnlink(pdfPath);
|
|
||||||
await safeRmrf(outDir);
|
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -467,23 +523,17 @@ function execFilePromise(cmd, args) {
|
|||||||
return new Promise((resolve, reject) => {
|
return new Promise((resolve, reject) => {
|
||||||
execFile(cmd, args, (err, _stdout, stderr) => {
|
execFile(cmd, args, (err, _stdout, stderr) => {
|
||||||
if (err) {
|
if (err) {
|
||||||
// Provide clearer missing-binary errors
|
|
||||||
if (err.code === "ENOENT") {
|
if (err.code === "ENOENT") {
|
||||||
return reject(new Error(`Missing dependency: ${cmd} (not found in PATH)`));
|
return reject(new Error(`Missing dependency: ${cmd} (not found in PATH)`));
|
||||||
}
|
}
|
||||||
const meta = `cmd=${cmd} code=${err.code || "unknown"} signal=${err.signal || "none"}`;
|
const meta = `cmd=${cmd} code=${err.code || "unknown"} signal=${err.signal || "none"}`;
|
||||||
return reject(new Error(`${meta}${stderr ? `; stderr=${stderr}` : ""}`));
|
return reject(new Error(`${meta}${stderr ? `; stderr=${stderr}` : ""}`));
|
||||||
}
|
}
|
||||||
else resolve();
|
resolve();
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
function pageNum(filename) {
|
|
||||||
const m = filename.match(/page-(\d+)\.jpg/i);
|
|
||||||
return m ? Number(m[1]) : 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
function clampInt(value, min, max, fallback) {
|
function clampInt(value, min, max, fallback) {
|
||||||
const n = Number(value);
|
const n = Number(value);
|
||||||
if (!Number.isFinite(n)) return fallback;
|
if (!Number.isFinite(n)) return fallback;
|
||||||
@@ -497,9 +547,3 @@ async function safeUnlink(p) {
|
|||||||
} catch {}
|
} catch {}
|
||||||
}
|
}
|
||||||
|
|
||||||
async function safeRmrf(p) {
|
|
||||||
if (!p) return;
|
|
||||||
try {
|
|
||||||
await fs.rm(p, { recursive: true, force: true });
|
|
||||||
} catch {}
|
|
||||||
}
|
|
||||||
|
|||||||
Reference in New Issue
Block a user