mirror of
https://github.com/mauriceboe/TREK.git
synced 2026-06-19 13:21:46 +00:00
8077ffab34
The place-photo cache (uploads/photos/google) grew unbounded: a Wikimedia geosearch path cached full-res originals despite requesting a 400px thumb, the writer applied no size guard, nothing reclaimed orphaned files, and backups archived the whole re-derivable cache verbatim. - Prefer the scaled `thumburl` over the full-res `info.url` in the Commons geosearch fallback. - Downscale any cached image to <=800px JPEG via the existing jimp dep, with a safe fallback to the original bytes on decode failure. - Add sweepOrphans() (orphaned meta rows + stray files) wired into the scheduler (startup + nightly), and removeIfUnreferenced() called on place delete for prompt reclamation. - Exclude the re-derivable photo/trek caches from backups; restores self-heal as the cache dirs are recreated at startup.
187 lines
7.1 KiB
TypeScript
187 lines
7.1 KiB
TypeScript
import path from 'node:path';
|
|
import fs from 'node:fs';
|
|
import fsPromises from 'node:fs/promises';
|
|
import crypto from 'node:crypto';
|
|
import { Jimp, JimpMime } from 'jimp';
|
|
import { db } from '../db/database';
|
|
|
|
// Overridable for tests (mirrors the TREK_DB_FILE seam) so the suite never touches
|
|
// the real uploads tree.
|
|
const GOOGLE_PHOTO_DIR = process.env.TREK_PLACE_PHOTO_DIR || path.join(__dirname, '../../uploads/photos/google');
|
|
const ERROR_TTL = 5 * 60 * 1000;
|
|
|
|
// Marker photos are displayed tiny — cap stored images so an oversized source
|
|
// (e.g. a Wikimedia Commons full-res original) can't bloat the cache. Matches
|
|
// THUMB_MAX/THUMB_QUALITY in memories/thumbnailService.ts.
|
|
const MAX_DIM = 800;
|
|
const JPEG_QUALITY = 80;
|
|
|
|
// In-flight dedup — prevents stampedes when multiple requests hit the same uncached placeId simultaneously
|
|
const inFlight = new Map<string, Promise<{ filePath: string; attribution: string | null } | null>>();
|
|
|
|
// In-memory set of placeIds whose file is confirmed on disk this session.
|
|
// Avoids a synchronous fs.existsSync() call on every cache hit after the first verification.
|
|
const knownOnDisk = new Set<string>();
|
|
|
|
// Ensure upload dir exists once at startup — avoids sync FS calls inside put() on every write.
|
|
try {
|
|
fs.mkdirSync(GOOGLE_PHOTO_DIR, { recursive: true });
|
|
} catch { /* already exists */ }
|
|
|
|
function filePath(placeId: string): string {
|
|
// Hash to avoid filename collisions — coords:lat:lng pseudo-IDs contain characters that
|
|
// collapse identically under sanitization (e.g. ':' and '.' both → '_')
|
|
const hash = crypto.createHash('sha1').update(placeId).digest('hex');
|
|
return path.join(GOOGLE_PHOTO_DIR, `${hash}.jpg`);
|
|
}
|
|
|
|
function proxyUrl(placeId: string): string {
|
|
return `/api/maps/place-photo/${encodeURIComponent(placeId)}/bytes`;
|
|
}
|
|
|
|
interface CachedPhoto {
|
|
photoUrl: string;
|
|
filePath: string;
|
|
attribution: string | null;
|
|
}
|
|
|
|
export function get(placeId: string): CachedPhoto | null {
|
|
const row = db.prepare(
|
|
'SELECT attribution FROM google_place_photo_meta WHERE place_id = ? AND error_at IS NULL'
|
|
).get(placeId) as { attribution: string | null } | undefined;
|
|
|
|
if (!row) return null;
|
|
|
|
const fp = filePath(placeId);
|
|
|
|
if (!knownOnDisk.has(placeId)) {
|
|
// First time this placeId is checked this session — verify the file exists on disk.
|
|
// (Guards against volume wipes or manual deletion between server restarts.)
|
|
if (!fs.existsSync(fp)) {
|
|
db.prepare('DELETE FROM google_place_photo_meta WHERE place_id = ?').run(placeId);
|
|
return null;
|
|
}
|
|
knownOnDisk.add(placeId);
|
|
}
|
|
|
|
return { photoUrl: proxyUrl(placeId), filePath: fp, attribution: row.attribution };
|
|
}
|
|
|
|
export function getErrored(placeId: string): boolean {
|
|
const row = db.prepare(
|
|
'SELECT error_at FROM google_place_photo_meta WHERE place_id = ? AND error_at IS NOT NULL'
|
|
).get(placeId) as { error_at: number } | undefined;
|
|
|
|
if (!row) return false;
|
|
return Date.now() - row.error_at < ERROR_TTL;
|
|
}
|
|
|
|
export function markError(placeId: string): void {
|
|
knownOnDisk.delete(placeId);
|
|
db.prepare(
|
|
'INSERT OR REPLACE INTO google_place_photo_meta (place_id, attribution, fetched_at, error_at) VALUES (?, NULL, ?, ?)'
|
|
).run(placeId, Date.now(), Date.now());
|
|
}
|
|
|
|
// Downscale oversized images to MAX_DIM before caching, re-encoding to JPEG.
|
|
// Defense-in-depth: keeps the cache small regardless of what the fetch path hands
|
|
// us. Jimp auto-applies EXIF orientation on read. Falls back to the original bytes
|
|
// on any failure (corrupt/unsupported format) so behaviour is never worse than before.
|
|
async function downscale(bytes: Buffer): Promise<Buffer> {
|
|
try {
|
|
const img = await Jimp.read(bytes);
|
|
if (img.bitmap.width <= MAX_DIM && img.bitmap.height <= MAX_DIM) return bytes;
|
|
img.scaleToFit({ w: MAX_DIM, h: MAX_DIM });
|
|
return await img.getBuffer(JimpMime.jpeg, { quality: JPEG_QUALITY });
|
|
} catch {
|
|
return bytes;
|
|
}
|
|
}
|
|
|
|
export async function put(placeId: string, bytes: Buffer, attribution: string | null): Promise<CachedPhoto> {
|
|
const fp = filePath(placeId);
|
|
const tmp = fp + '.tmp';
|
|
|
|
const resized = await downscale(bytes);
|
|
await fsPromises.writeFile(tmp, resized);
|
|
await fsPromises.rename(tmp, fp);
|
|
|
|
knownOnDisk.add(placeId);
|
|
|
|
db.prepare(
|
|
'INSERT OR REPLACE INTO google_place_photo_meta (place_id, attribution, fetched_at, error_at) VALUES (?, ?, ?, NULL)'
|
|
).run(placeId, attribution, Date.now());
|
|
|
|
return { photoUrl: proxyUrl(placeId), filePath: fp, attribution };
|
|
}
|
|
|
|
export function getInFlight(placeId: string): Promise<{ filePath: string; attribution: string | null } | null> | undefined {
|
|
return inFlight.get(placeId);
|
|
}
|
|
|
|
export function setInFlight(placeId: string, promise: Promise<{ filePath: string; attribution: string | null } | null>): void {
|
|
inFlight.set(placeId, promise);
|
|
promise
|
|
.finally(() => inFlight.delete(placeId))
|
|
.catch(() => { /* awaiter logs; this .catch only prevents unhandledRejection */ });
|
|
}
|
|
|
|
export function serveFilePath(placeId: string): string | null {
|
|
if (knownOnDisk.has(placeId)) return filePath(placeId);
|
|
const fp = filePath(placeId);
|
|
if (!fs.existsSync(fp)) return null;
|
|
knownOnDisk.add(placeId);
|
|
return fp;
|
|
}
|
|
|
|
// A cache entry is "referenced" while any place still points at it — either by the
|
|
// Google place_id (the dedup key) or by the stable proxy URL stored in image_url
|
|
// (covers coords: pseudo-ids, which never have a google_place_id).
|
|
function isReferenced(placeId: string): boolean {
|
|
const row = db.prepare(
|
|
'SELECT 1 FROM places WHERE google_place_id = ? OR image_url = ? LIMIT 1'
|
|
).get(placeId, proxyUrl(placeId));
|
|
return !!row;
|
|
}
|
|
|
|
function deleteEntry(placeId: string): void {
|
|
try { fs.unlinkSync(filePath(placeId)); } catch { /* already gone */ }
|
|
db.prepare('DELETE FROM google_place_photo_meta WHERE place_id = ?').run(placeId);
|
|
knownOnDisk.delete(placeId);
|
|
}
|
|
|
|
// Drop a cache entry if no place references it anymore. Called after a place delete
|
|
// for prompt reclamation; the nightly sweep is the catch-all for every other path.
|
|
export function removeIfUnreferenced(placeId: string): void {
|
|
if (isReferenced(placeId)) return;
|
|
deleteEntry(placeId);
|
|
}
|
|
|
|
// Reclaim orphaned cache files + meta rows. Runs on startup and nightly (scheduler).
|
|
// Two passes: (1) meta rows no place references; (2) stray .jpg files with no meta row.
|
|
export function sweepOrphans(): number {
|
|
let removed = 0;
|
|
|
|
const rows = db.prepare('SELECT place_id FROM google_place_photo_meta').all() as { place_id: string }[];
|
|
const keepFiles = new Set<string>();
|
|
for (const { place_id } of rows) {
|
|
if (isReferenced(place_id)) {
|
|
keepFiles.add(`${crypto.createHash('sha1').update(place_id).digest('hex')}.jpg`);
|
|
} else {
|
|
deleteEntry(place_id);
|
|
removed++;
|
|
}
|
|
}
|
|
|
|
// Pass 2: files on disk that no surviving meta row maps to (e.g. left over from a
|
|
// crash between writeFile and the DB upsert, or a meta row deleted out-of-band).
|
|
let entries: string[] = [];
|
|
try { entries = fs.readdirSync(GOOGLE_PHOTO_DIR); } catch { entries = []; }
|
|
for (const entry of entries) {
|
|
if (!entry.endsWith('.jpg') || keepFiles.has(entry)) continue;
|
|
try { fs.unlinkSync(path.join(GOOGLE_PHOTO_DIR, entry)); removed++; } catch { /* race */ }
|
|
}
|
|
|
|
return removed;
|
|
}
|