Files
PostConvert/server.js
2026-01-22 08:20:01 -08:00

550 lines
16 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import express from "express";
import sharp from "sharp";
import { execFile } from "child_process";
import fs from "fs/promises";
import path from "path";
import { randomUUID } from "crypto";
import archiver from "archiver";
// libheif-js import can be default or module object depending on bundling
import libheifModule from "libheif-js";
const libheif = libheifModule?.default ?? libheifModule;
const app = express();
app.use(express.raw({ type: "*/*", limit: "30mb" }));
app.get("/", (_req, res) => res.status(200).send("postconvert: ok"));
app.get("/health", (_req, res) => res.status(200).send("ok"));
// ---------------- Request context / logging ----------------
// Defaults can be overridden by env vars.
// - REQ_TIMEOUT_MS: general request timeout
// - REQ_TIMEOUT_PDF_MS: longer timeout for multi-page PDF zips
const DEFAULT_REQ_TIMEOUT_MS = clampInt(process.env.REQ_TIMEOUT_MS, 5_000, 10 * 60_000, 120_000);
const DEFAULT_REQ_TIMEOUT_PDF_MS = clampInt(
process.env.REQ_TIMEOUT_PDF_MS,
10_000,
30 * 60_000,
5 * 60_000
);
app.use((req, res, next) => {
const requestId = String(req.headers["x-request-id"] || "").trim() || randomUUID();
req.requestId = requestId;
res.setHeader("x-request-id", requestId);
const started = Date.now();
req.setTimeout(DEFAULT_REQ_TIMEOUT_MS);
res.setTimeout(DEFAULT_REQ_TIMEOUT_MS);
res.on("finish", () => {
const ms = Date.now() - started;
const len = Number(req.headers["content-length"] || 0) || (req.body?.length ?? 0) || 0;
console.log(
JSON.stringify({
requestId,
method: req.method,
path: req.originalUrl,
status: res.statusCode,
contentType: req.headers["content-type"] || null,
bytesIn: len,
ms,
})
);
});
next();
});
function isAborted(req, res) {
return Boolean(req.aborted || res.writableEnded || res.destroyed);
}
function sendError(res, status, code, message, requestId) {
if (res.headersSent) {
try {
res.end();
} catch {}
return;
}
res.status(status);
res.setHeader("Content-Type", "application/json; charset=utf-8");
res.send(JSON.stringify({ error: code, message, requestId }));
}
// ---------------- Auth ----------------
function requireAuth(req, res) {
const token = process.env.CONVERTER_TOKEN;
const auth = req.headers.authorization || "";
if (!token || auth !== `Bearer ${token}`) {
sendError(res, 401, "unauthorized", "Unauthorized", req.requestId);
return false;
}
return true;
}
// ---------------- Type checks ----------------
function isPdfRequest(req) {
const contentType = String(req.headers["content-type"] || "").toLowerCase();
const filename = String(req.headers["x-filename"] || "").toLowerCase();
return contentType.startsWith("application/pdf") || filename.endsWith(".pdf");
}
function looksLikeHeic(buf) {
// ISO-BMFF container: "ftyp" at offset 4. Scan brands for HEIF-family.
if (!buf || buf.length < 16) return false;
if (buf.toString("ascii", 4, 8) !== "ftyp") return false;
// Scan more than first 32 bytes; compatible brands can appear later.
const scanEnd = Math.min(buf.length, 256);
const brands = buf.toString("ascii", 8, scanEnd);
return (
brands.includes("heic") ||
brands.includes("heif") ||
brands.includes("heix") ||
brands.includes("hevc") ||
brands.includes("hevx") ||
brands.includes("mif1") ||
brands.includes("msf1")
);
}
async function assertSupportedRasterImage(input, req) {
// If its HEIC/HEIF, sharp may fail metadata; allow it through to WASM path.
if (looksLikeHeic(input)) return;
try {
await sharp(input, { failOnError: false }).metadata();
} catch {
const ct = String(req.headers["content-type"] || "unknown");
throw Object.assign(new Error(`Unsupported input (not a decodable image). content-type=${ct}`), {
statusCode: 415,
code: "unsupported_media_type",
});
}
}
// ---------------- Resize / Quality options ----------------
// Headers:
// - x-jpeg-quality: 0..100 (default 100)
// - x-max-dimension: px (max width/height), preserves aspect (default none)
// - x-width: px (optional)
// - x-height: px (optional)
// - x-fit: inside|cover|contain|fill|outside (default inside)
// - x-without-enlargement: true|false (default true)
//
// PDF headers:
// - x-pdf-dpi: 72..600 (default 300)
// - x-pdf-max-pages: 1..200 (default 50)
function parseBool(v, fallback = false) {
if (v == null) return fallback;
const s = String(v).toLowerCase().trim();
if (["1", "true", "yes", "y", "on"].includes(s)) return true;
if (["0", "false", "no", "n", "off"].includes(s)) return false;
return fallback;
}
function parseResizeOptions(req) {
const quality = clampInt(req.headers["x-jpeg-quality"], 0, 100, 100);
const width = clampInt(req.headers["x-width"], 1, 20000, 0) || null;
const height = clampInt(req.headers["x-height"], 1, 20000, 0) || null;
const maxDim = clampInt(req.headers["x-max-dimension"], 1, 20000, 0) || null;
const fitRaw = String(req.headers["x-fit"] || "inside").toLowerCase();
const fit = ["inside", "cover", "contain", "fill", "outside"].includes(fitRaw) ? fitRaw : "inside";
const withoutEnlargement = parseBool(req.headers["x-without-enlargement"], true);
return { quality, width, height, maxDim, fit, withoutEnlargement };
}
function applyResizeAndJpeg(pipeline, opts) {
const { width, height, maxDim, fit, withoutEnlargement, quality } = opts;
// Resize: explicit width/height wins; else max dimension inside box.
if (width || height) {
pipeline = pipeline.resize({
width: width ?? undefined,
height: height ?? undefined,
fit,
withoutEnlargement,
});
} else if (maxDim) {
pipeline = pipeline.resize({
width: maxDim,
height: maxDim,
fit: "inside",
withoutEnlargement,
});
}
return pipeline.jpeg({
quality,
chromaSubsampling: "4:4:4",
mozjpeg: true,
progressive: true,
});
}
// ---------------- Core converters ----------------
async function toJpegWithSharp(inputBuffer, opts) {
const pipeline = sharp(inputBuffer, {
failOnError: false,
limitInputPixels: 200e6, // safety
}).rotate();
return applyResizeAndJpeg(pipeline, opts).toBuffer();
}
function heifDisplayToRGBA(img) {
// libheif-js uses a callback-style async `display(bufferObj, cb)`
return new Promise((resolve, reject) => {
try {
const width = img.get_width();
const height = img.get_height();
const rgba = new Uint8Array(width * height * 4);
const bufObj = { data: rgba, width, height, channels: 4 };
img.display(bufObj, (out) => {
if (!out || !out.data) {
return reject(new Error("libheif-js display() failed (returned null)"));
}
return resolve({
width,
height,
rgba: out.data instanceof Uint8Array ? out.data : rgba,
});
});
} catch (e) {
reject(e);
}
});
}
async function heicToJpegWithWasm(inputBuffer, opts) {
if (!libheif?.HeifDecoder) {
throw new Error("libheif-js not available (HeifDecoder missing)");
}
const decoder = new libheif.HeifDecoder();
const images = decoder.decode(inputBuffer);
if (!images || images.length === 0) {
throw new Error("WASM HEIF decode produced no images");
}
const img = images[0];
const { width, height, rgba } = await heifDisplayToRGBA(img);
// Encode to JPEG with sharp (consistent output settings)
const pipeline = sharp(Buffer.from(rgba), { raw: { width, height, channels: 4 } });
return applyResizeAndJpeg(pipeline, opts).toBuffer();
}
async function pdfFirstPageToJpeg(inputBuffer, opts, dpi = 300) {
const id = randomUUID();
const pdfPath = `/tmp/${id}.pdf`;
const outPrefix = `/tmp/${id}`;
try {
await fs.writeFile(pdfPath, inputBuffer);
await execFilePromise("pdftoppm", ["-jpeg", "-r", String(dpi), "-singlefile", pdfPath, outPrefix]);
const pageJpg = await fs.readFile(`${outPrefix}.jpg`);
return toJpegWithSharp(pageJpg, opts);
} finally {
await safeUnlink(pdfPath);
await safeUnlink(`${outPrefix}.jpg`);
}
}
/**
* Page-by-page PDF rendering:
* Uses pdftoppm -f N -l N -singlefile to render one page at a time,
* keeping /tmp usage bounded (one rendered JPEG at a time).
*
* Returns { produced: number }.
*/
async function pdfPagesToZipStreamPageByPage({
inputPdfBuffer,
res,
req,
opts,
dpi,
maxPages,
outNamePrefix = "page",
}) {
const requestId = req.requestId;
const jobId = randomUUID();
const pdfPath = `/tmp/${jobId}.pdf`;
const outPrefix = `/tmp/${jobId}-${outNamePrefix}`; // pdftoppm writes `${outPrefix}.jpg`
let produced = 0;
try {
await fs.writeFile(pdfPath, inputPdfBuffer);
for (let page = 1; page <= maxPages; page++) {
if (isAborted(req, res)) break;
// Render a single page to `${outPrefix}.jpg`
await execFilePromise("pdftoppm", [
"-jpeg",
"-r",
String(dpi),
"-f",
String(page),
"-l",
String(page),
"-singlefile",
pdfPath,
outPrefix,
]);
const renderedPath = `${outPrefix}.jpg`;
// If pdftoppm produced nothing (EOF), stop cleanly.
let pageBuf;
try {
pageBuf = await fs.readFile(renderedPath);
} catch (e) {
// If it didn't write the file, assume we're past the last page.
// (Could also be a failure; but pdftoppm errors should have thrown above.)
break;
}
if (isAborted(req, res)) {
await safeUnlink(renderedPath);
break;
}
const jpegBuf = await toJpegWithSharp(pageBuf, opts);
await safeUnlink(renderedPath);
if (isAborted(req, res)) break;
const n = String(page).padStart(3, "0");
res.archive.append(jpegBuf, { name: `${n}.jpg` });
produced++;
}
if (produced === 0 && !isAborted(req, res)) {
// If we produced nothing, treat as PDF render failure.
throw new Error("PDF render produced no pages");
}
return { produced };
} catch (e) {
console.error(JSON.stringify({ requestId, err: String(e?.stack || e) }));
throw e;
} finally {
await safeUnlink(pdfPath);
// renderedPath cleaned each iteration; safe cleanup in case of partial failures:
await safeUnlink(`${outPrefix}.jpg`);
}
}
// ---------------- Endpoints ----------------
// Single JPEG output (images + PDF first page)
app.post("/convert", async (req, res) => {
const requestId = req.requestId;
try {
if (!requireAuth(req, res)) return;
if (isAborted(req, res)) return;
const input = req.body;
if (!input || input.length === 0) {
return sendError(res, 400, "empty_body", "Empty body", requestId);
}
const opts = parseResizeOptions(req);
// PDF: handle via poppler
if (isPdfRequest(req)) {
const jpeg = await pdfFirstPageToJpeg(input, opts, 300);
if (isAborted(req, res)) return;
res.setHeader("Content-Type", "image/jpeg");
return res.status(200).send(jpeg);
}
// Non-PDF: validate input is a decodable raster image (or HEIC)
await assertSupportedRasterImage(input, req);
// Try sharp first (fast path)
try {
const jpeg = await toJpegWithSharp(input, opts);
if (isAborted(req, res)) return;
res.setHeader("Content-Type", "image/jpeg");
return res.status(200).send(jpeg);
} catch (sharpErr) {
// If it looks like HEIC/HEIF, decode via WASM and encode to JPEG
if (looksLikeHeic(input)) {
const jpeg = await heicToJpegWithWasm(input, opts);
if (isAborted(req, res)) return;
res.setHeader("Content-Type", "image/jpeg");
return res.status(200).send(jpeg);
}
throw sharpErr;
}
} catch (e) {
if (e?.statusCode === 415) {
return sendError(res, 415, e.code || "unsupported_media_type", e.message, requestId);
}
console.error(JSON.stringify({ requestId, err: String(e?.stack || e) }));
return sendError(res, 500, "conversion_failed", "Conversion failed", requestId);
}
});
// PDF all pages -> ZIP of JPEG pages (page-by-page rendering to keep /tmp bounded)
app.post("/convert/pdf", async (req, res) => {
const requestId = req.requestId;
// More time for multi-page zips
req.setTimeout(DEFAULT_REQ_TIMEOUT_PDF_MS);
res.setTimeout(DEFAULT_REQ_TIMEOUT_PDF_MS);
let archive = null;
try {
if (!requireAuth(req, res)) return;
if (isAborted(req, res)) return;
const input = req.body;
if (!input || input.length === 0) {
return sendError(res, 400, "empty_body", "Empty body", requestId);
}
if (!isPdfRequest(req)) {
return sendError(res, 415, "unsupported_media_type", "This endpoint only accepts PDFs", requestId);
}
const opts = parseResizeOptions(req);
const dpi = clampInt(req.headers["x-pdf-dpi"], 72, 600, 300);
const maxPages = clampInt(req.headers["x-pdf-max-pages"], 1, 200, 50);
res.status(200);
res.setHeader("Content-Type", "application/zip");
res.setHeader("Content-Disposition", `attachment; filename="pdf-pages-${randomUUID()}.zip"`);
archive = archiver("zip", { zlib: { level: 6 } });
// Attach archive to res for the helper
res.archive = archive;
// Abort work if client disconnects
res.on("close", () => {
try {
archive?.abort();
} catch {}
});
res.on("aborted", () => {
try {
archive?.abort();
} catch {}
});
archive.on("error", (err) => {
console.error(JSON.stringify({ requestId, err: String(err?.stack || err) }));
try {
// If we already started streaming, we can only end.
res.end();
} catch {}
});
archive.pipe(res);
// Render and append pages one-at-a-time
await pdfPagesToZipStreamPageByPage({
inputPdfBuffer: input,
res,
req,
opts,
dpi,
maxPages,
});
if (!isAborted(req, res)) {
await archive.finalize();
}
} catch (e) {
console.error(JSON.stringify({ requestId, err: String(e?.stack || e) }));
// If we already started streaming a zip, we cant reliably send JSON.
if (res.headersSent) {
try {
res.end();
} catch {}
return;
}
const msg =
String(e?.message || "").includes("Missing dependency: pdftoppm") || String(e?.message || "").includes("ENOENT")
? "Server missing PDF rendering dependency"
: "Conversion failed";
return sendError(res, 500, "conversion_failed", msg, requestId);
}
});
// Oversize handler
app.use((err, req, res, next) => {
if (err?.type === "entity.too.large") {
return sendError(res, 413, "payload_too_large", "Payload too large (max 30mb)", req.requestId);
}
return next(err);
});
const port = Number(process.env.PORT) || 8080;
app.listen(port, "0.0.0.0", () => {
console.log(`converter listening on 0.0.0.0:${port}`);
});
// ---------------- Helpers ----------------
function execFilePromise(cmd, args) {
return new Promise((resolve, reject) => {
execFile(cmd, args, (err, _stdout, stderr) => {
if (err) {
if (err.code === "ENOENT") {
return reject(new Error(`Missing dependency: ${cmd} (not found in PATH)`));
}
const meta = `cmd=${cmd} code=${err.code || "unknown"} signal=${err.signal || "none"}`;
return reject(new Error(`${meta}${stderr ? `; stderr=${stderr}` : ""}`));
}
resolve();
});
});
}
function clampInt(value, min, max, fallback) {
const n = Number(value);
if (!Number.isFinite(n)) return fallback;
return Math.max(min, Math.min(max, Math.floor(n)));
}
async function safeUnlink(p) {
if (!p) return;
try {
await fs.unlink(p);
} catch {}
}