Files
PostConvert/server.js
Matthew Jackson 38b4173a6e Update server.js
normalize images for vision: apply EXIF rotation, force RGB, strip metadata

- Apply EXIF orientation before encoding to avoid sideways OCR
- Normalize colorspace to RGB for deterministic vision inputs
- Strip all EXIF/IPTC/XMP metadata from output JPEGs
- Bound image dimensions to reduce OCR latency and timeouts
- Standardize JPEG encoding settings for vision reliability
2026-01-25 11:40:51 -08:00

321 lines
9.2 KiB
JavaScript

import express from "express";
import sharp from "sharp";
import { execFile } from "child_process";
import fs from "fs/promises";
import { randomUUID } from "crypto";
import archiver from "archiver";
import libheifModule from "libheif-js";
const libheif = libheifModule?.default ?? libheifModule;
const app = express();
app.use(express.raw({ type: "*/*", limit: "30mb" }));
app.get("/", (_req, res) => res.status(200).send("postconvert: ok"));
app.get("/health", (_req, res) => res.status(200).send("ok"));
/* ------------------------------------------------------------------ */
/* Request context / logging */
/* ------------------------------------------------------------------ */
const DEFAULT_REQ_TIMEOUT_MS = clampInt(
process.env.REQ_TIMEOUT_MS,
5_000,
10 * 60_000,
120_000
);
const DEFAULT_REQ_TIMEOUT_PDF_MS = clampInt(
process.env.REQ_TIMEOUT_PDF_MS,
10_000,
30 * 60_000,
5 * 60_000
);
app.use((req, res, next) => {
const requestId =
String(req.headers["x-request-id"] || "").trim() || randomUUID();
req.requestId = requestId;
res.setHeader("x-request-id", requestId);
const started = Date.now();
req.setTimeout(DEFAULT_REQ_TIMEOUT_MS);
res.setTimeout(DEFAULT_REQ_TIMEOUT_MS);
res.on("finish", () => {
const ms = Date.now() - started;
const len =
Number(req.headers["content-length"] || 0) ||
(req.body?.length ?? 0) ||
0;
console.log(
JSON.stringify({
requestId,
method: req.method,
path: req.originalUrl,
status: res.statusCode,
bytesIn: len,
ms,
})
);
});
next();
});
function isAborted(req, res) {
return Boolean(req.aborted || res.writableEnded || res.destroyed);
}
function sendError(res, status, code, message, requestId) {
if (res.headersSent) {
try {
res.end();
} catch {}
return;
}
res.status(status).json({ error: code, message, requestId });
}
/* ------------------------------------------------------------------ */
/* Auth */
/* ------------------------------------------------------------------ */
function requireAuth(req, res) {
const token = process.env.CONVERTER_TOKEN;
const auth = req.headers.authorization || "";
if (!token || auth !== `Bearer ${token}`) {
sendError(res, 401, "unauthorized", "Unauthorized", req.requestId);
return false;
}
return true;
}
/* ------------------------------------------------------------------ */
/* Type detection */
/* ------------------------------------------------------------------ */
function isPdfRequest(req) {
const ct = String(req.headers["content-type"] || "").toLowerCase();
const fn = String(req.headers["x-filename"] || "").toLowerCase();
return ct.startsWith("application/pdf") || fn.endsWith(".pdf");
}
function looksLikeHeic(buf) {
if (!buf || buf.length < 16) return false;
if (buf.toString("ascii", 4, 8) !== "ftyp") return false;
const brands = buf.toString("ascii", 8, Math.min(buf.length, 256));
return (
brands.includes("heic") ||
brands.includes("heif") ||
brands.includes("heix") ||
brands.includes("hevc") ||
brands.includes("hevx") ||
brands.includes("mif1") ||
brands.includes("msf1")
);
}
async function assertSupportedRaster(input, req) {
if (looksLikeHeic(input)) return;
try {
await sharp(input, { failOnError: false }).metadata();
} catch {
throw Object.assign(
new Error("Unsupported image input"),
{ statusCode: 415, code: "unsupported_media_type" }
);
}
}
/* ------------------------------------------------------------------ */
/* Resize / quality options */
/* ------------------------------------------------------------------ */
function parseBool(v, fallback = false) {
if (v == null) return fallback;
const s = String(v).toLowerCase().trim();
if (["1", "true", "yes", "y", "on"].includes(s)) return true;
if (["0", "false", "no", "n", "off"].includes(s)) return false;
return fallback;
}
function parseOptions(req) {
return {
quality: clampInt(req.headers["x-jpeg-quality"], 40, 100, 85),
maxDim: clampInt(req.headers["x-max-dimension"], 500, 6000, 2000),
fit: "inside",
withoutEnlargement: parseBool(req.headers["x-without-enlargement"], true),
};
}
/* ------------------------------------------------------------------ */
/* Vision-safe normalization */
/* ------------------------------------------------------------------ */
function normalizeForVision(input, opts) {
let pipeline = sharp(input, {
failOnError: false,
limitInputPixels: 200e6,
})
.rotate() // apply EXIF orientation
.toColorspace("rgb"); // normalize colorspace
if (opts.maxDim) {
pipeline = pipeline.resize({
width: opts.maxDim,
height: opts.maxDim,
fit: "inside",
withoutEnlargement: opts.withoutEnlargement,
});
}
return pipeline
.jpeg({
quality: opts.quality,
chromaSubsampling: "4:4:4",
mozjpeg: true,
progressive: true,
})
.withMetadata(false) // explicit: strip ALL metadata
.toBuffer();
}
/* ------------------------------------------------------------------ */
/* HEIC via WASM */
/* ------------------------------------------------------------------ */
function heifDisplayToRGBA(img) {
return new Promise((resolve, reject) => {
try {
const w = img.get_width();
const h = img.get_height();
const rgba = new Uint8Array(w * h * 4);
img.display({ data: rgba, width: w, height: h, channels: 4 }, () =>
resolve({ width: w, height: h, rgba })
);
} catch (e) {
reject(e);
}
});
}
async function heicToJpeg(input, opts) {
if (!libheif?.HeifDecoder) {
throw new Error("libheif-js unavailable");
}
const dec = new libheif.HeifDecoder();
const imgs = dec.decode(input);
if (!imgs?.length) throw new Error("HEIC decode failed");
const { width, height, rgba } = await heifDisplayToRGBA(imgs[0]);
return normalizeForVision(
Buffer.from(rgba),
{ ...opts, raw: { width, height, channels: 4 } }
);
}
/* ------------------------------------------------------------------ */
/* PDF handling */
/* ------------------------------------------------------------------ */
async function pdfFirstPageToJpeg(input, opts, dpi = 300) {
const id = randomUUID();
const pdf = `/tmp/${id}.pdf`;
const out = `/tmp/${id}.jpg`;
try {
await fs.writeFile(pdf, input);
await execFilePromise("pdftoppm", [
"-jpeg",
"-singlefile",
"-r",
String(dpi),
pdf,
`/tmp/${id}`,
]);
const buf = await fs.readFile(out);
return normalizeForVision(buf, opts);
} finally {
await safeUnlink(pdf);
await safeUnlink(out);
}
}
/* ------------------------------------------------------------------ */
/* Routes */
/* ------------------------------------------------------------------ */
app.post("/convert", async (req, res) => {
try {
if (!requireAuth(req, res)) return;
if (!req.body?.length)
return sendError(res, 400, "empty_body", "Empty body", req.requestId);
const opts = parseOptions(req);
if (isPdfRequest(req)) {
const jpeg = await pdfFirstPageToJpeg(req.body, opts);
res.setHeader("Content-Type", "image/jpeg");
return res.send(jpeg);
}
await assertSupportedRaster(req.body, req);
try {
const jpeg = await normalizeForVision(req.body, opts);
res.setHeader("Content-Type", "image/jpeg");
return res.send(jpeg);
} catch {
if (looksLikeHeic(req.body)) {
const jpeg = await heicToJpeg(req.body, opts);
res.setHeader("Content-Type", "image/jpeg");
return res.send(jpeg);
}
throw new Error("Image conversion failed");
}
} catch (e) {
console.error(JSON.stringify({ requestId: req.requestId, err: String(e) }));
return sendError(res, 500, "conversion_failed", "Conversion failed", req.requestId);
}
});
/* ------------------------------------------------------------------ */
/* Helpers */
/* ------------------------------------------------------------------ */
function execFilePromise(cmd, args) {
return new Promise((resolve, reject) => {
execFile(cmd, args, (err, _stdout, stderr) => {
if (err) {
if (err.code === "ENOENT") {
reject(new Error(`Missing dependency: ${cmd}`));
} else {
reject(new Error(stderr || String(err)));
}
} else resolve();
});
});
}
function clampInt(v, min, max, fallback) {
const n = Number(v);
if (!Number.isFinite(n)) return fallback;
return Math.max(min, Math.min(max, Math.floor(n)));
}
async function safeUnlink(p) {
try {
await fs.unlink(p);
} catch {}
}
/* ------------------------------------------------------------------ */
const port = Number(process.env.PORT) || 8080;
app.listen(port, "0.0.0.0", () =>
console.log(`converter listening on :${port}`)
);