fix(maps): bound place-photo cache growth (Wikimedia + Google) (#1174)

The place-photo cache (uploads/photos/google) grew unbounded: a Wikimedia
geosearch path cached full-res originals despite requesting a 400px thumb,
the writer applied no size guard, nothing reclaimed orphaned files, and
backups archived the whole re-derivable cache verbatim.

- Prefer the scaled `thumburl` over the full-res `info.url` in the Commons
  geosearch fallback.
- Downscale any cached image to <=800px JPEG via the existing jimp dep,
  with a safe fallback to the original bytes on decode failure.
- Add sweepOrphans() (orphaned meta rows + stray files) wired into the
  scheduler (startup + nightly), and removeIfUnreferenced() called on
  place delete for prompt reclamation.
- Exclude the re-derivable photo/trek caches from backups; restores
  self-heal as the cache dirs are recreated at startup.
This commit is contained in:
jubnl
2026-06-14 23:31:02 +02:00
committed by GitHub
parent 3e9626fce9
commit 8077ffab34
10 changed files with 349 additions and 13 deletions
+1
View File
@@ -79,6 +79,7 @@ const onListen = () => {
scheduler.startDemoReset();
scheduler.startIdempotencyCleanup();
scheduler.startTrekPhotoCacheCleanup();
scheduler.startPlacePhotoCacheCleanup();
scheduler.startAirTrailSync();
const { startTokenCleanup } = require('./services/ephemeralTokens');
startTokenCleanup();
+26 -1
View File
@@ -334,6 +334,30 @@ function startTrekPhotoCacheCleanup(): void {
});
}
// Place-photo (Google/Wikimedia) cache cleanup: nightly — reclaim cached files and
// meta rows no place references anymore (deleted places/trips, overwritten image_url).
let placePhotoCacheTask: ScheduledTask | null = null;
function startPlacePhotoCacheCleanup(): void {
if (placePhotoCacheTask) { placePhotoCacheTask.stop(); placePhotoCacheTask = null; }
const sweep = () => {
try {
const { sweepOrphans } = require('./services/placePhotoCache');
const removed = sweepOrphans();
if (removed > 0) logInfo(`Place-photo cache cleanup: removed ${removed} orphaned file(s)/row(s)`);
} catch (err: unknown) {
logError(`Place-photo cache cleanup: ${err instanceof Error ? err.message : err}`);
}
};
// Run once on startup to reclaim orphans left over from before this sweeper existed.
sweep();
const tz = process.env.TZ || 'UTC';
placePhotoCacheTask = cron.schedule('30 3 * * *', sweep, { timezone: tz });
}
// AirTrail sync: poll connected instances on an interval and reconcile linked
// flights both ways (#214). The per-tick enable gate (addon + setting) lives in
// runAirtrailSync, so toggling the addon takes effect without a restart.
@@ -366,7 +390,8 @@ function stop(): void {
if (versionCheckTask) { versionCheckTask.stop(); versionCheckTask = null; }
if (idempotencyCleanupTask) { idempotencyCleanupTask.stop(); idempotencyCleanupTask = null; }
if (trekPhotoCacheTask) { trekPhotoCacheTask.stop(); trekPhotoCacheTask = null; }
if (placePhotoCacheTask) { placePhotoCacheTask.stop(); placePhotoCacheTask = null; }
if (airtrailSyncTask) { airtrailSyncTask.stop(); airtrailSyncTask = null; }
}
export { start, stop, startDemoReset, startTripReminders, startTodoReminders, startVersionCheck, startIdempotencyCleanup, startTrekPhotoCacheCleanup, startAirTrailSync, loadSettings, saveSettings, VALID_INTERVALS };
export { start, stop, startDemoReset, startTripReminders, startTodoReminders, startVersionCheck, startIdempotencyCleanup, startTrekPhotoCacheCleanup, startPlacePhotoCacheCleanup, startAirTrailSync, loadSettings, saveSettings, VALID_INTERVALS };
+8 -1
View File
@@ -156,7 +156,14 @@ export async function createBackup(): Promise<BackupInfo> {
}
if (fs.existsSync(uploadsDir)) {
archive.directory(uploadsDir, 'uploads');
// Exclude the place-photo and trek-memory caches: both are re-derivable
// (re-fetched on demand, keyed on stable ids) and would otherwise dominate
// backup size. Restores self-heal — the cache dirs are recreated at startup.
archive.glob(
'**/*',
{ cwd: uploadsDir, ignore: ['photos/google/**', 'photos/trek/**'], nodir: true, dot: true },
{ prefix: 'uploads' },
);
}
archive.finalize();
+4 -2
View File
@@ -33,7 +33,7 @@ interface OverpassElement {
}
interface WikiCommonsPage {
imageinfo?: { url?: string; extmetadata?: { Artist?: { value?: string } } }[];
imageinfo?: { url?: string; thumburl?: string; extmetadata?: { Artist?: { value?: string } } }[];
}
interface GooglePlaceResult {
@@ -537,7 +537,9 @@ export async function fetchWikimediaPhoto(lat: number, lng: number, name?: strin
const mime = (info as { mime?: string })?.mime || '';
if (info?.url && (mime.startsWith('image/jpeg') || mime.startsWith('image/png'))) {
const attribution = info.extmetadata?.Artist?.value?.replace(/<[^>]+>/g, '').trim() || null;
return { photoUrl: info.url, attribution };
// iiurlwidth=400 makes Commons also return a scaled thumburl. Prefer it —
// info.url is the full-resolution original (multi-megapixel camera exports).
return { photoUrl: info.thumburl ?? info.url, attribution };
}
}
return null;
+78 -2
View File
@@ -2,11 +2,20 @@ import path from 'node:path';
import fs from 'node:fs';
import fsPromises from 'node:fs/promises';
import crypto from 'node:crypto';
import { Jimp, JimpMime } from 'jimp';
import { db } from '../db/database';
const GOOGLE_PHOTO_DIR = path.join(__dirname, '../../uploads/photos/google');
// Overridable for tests (mirrors the TREK_DB_FILE seam) so the suite never touches
// the real uploads tree.
const GOOGLE_PHOTO_DIR = process.env.TREK_PLACE_PHOTO_DIR || path.join(__dirname, '../../uploads/photos/google');
const ERROR_TTL = 5 * 60 * 1000;
// Marker photos are displayed tiny — cap stored images so an oversized source
// (e.g. a Wikimedia Commons full-res original) can't bloat the cache. Matches
// THUMB_MAX/THUMB_QUALITY in memories/thumbnailService.ts.
const MAX_DIM = 800;
const JPEG_QUALITY = 80;
// In-flight dedup — prevents stampedes when multiple requests hit the same uncached placeId simultaneously
const inFlight = new Map<string, Promise<{ filePath: string; attribution: string | null } | null>>();
@@ -74,11 +83,27 @@ export function markError(placeId: string): void {
).run(placeId, Date.now(), Date.now());
}
// Downscale oversized images to MAX_DIM before caching, re-encoding to JPEG.
// Defense-in-depth: keeps the cache small regardless of what the fetch path hands
// us. Jimp auto-applies EXIF orientation on read. Falls back to the original bytes
// on any failure (corrupt/unsupported format) so behaviour is never worse than before.
async function downscale(bytes: Buffer): Promise<Buffer> {
try {
const img = await Jimp.read(bytes);
if (img.bitmap.width <= MAX_DIM && img.bitmap.height <= MAX_DIM) return bytes;
img.scaleToFit({ w: MAX_DIM, h: MAX_DIM });
return await img.getBuffer(JimpMime.jpeg, { quality: JPEG_QUALITY });
} catch {
return bytes;
}
}
export async function put(placeId: string, bytes: Buffer, attribution: string | null): Promise<CachedPhoto> {
const fp = filePath(placeId);
const tmp = fp + '.tmp';
await fsPromises.writeFile(tmp, bytes);
const resized = await downscale(bytes);
await fsPromises.writeFile(tmp, resized);
await fsPromises.rename(tmp, fp);
knownOnDisk.add(placeId);
@@ -108,3 +133,54 @@ export function serveFilePath(placeId: string): string | null {
knownOnDisk.add(placeId);
return fp;
}
// A cache entry is "referenced" while any place still points at it — either by the
// Google place_id (the dedup key) or by the stable proxy URL stored in image_url
// (covers coords: pseudo-ids, which never have a google_place_id).
function isReferenced(placeId: string): boolean {
const row = db.prepare(
'SELECT 1 FROM places WHERE google_place_id = ? OR image_url = ? LIMIT 1'
).get(placeId, proxyUrl(placeId));
return !!row;
}
function deleteEntry(placeId: string): void {
try { fs.unlinkSync(filePath(placeId)); } catch { /* already gone */ }
db.prepare('DELETE FROM google_place_photo_meta WHERE place_id = ?').run(placeId);
knownOnDisk.delete(placeId);
}
// Drop a cache entry if no place references it anymore. Called after a place delete
// for prompt reclamation; the nightly sweep is the catch-all for every other path.
export function removeIfUnreferenced(placeId: string): void {
if (isReferenced(placeId)) return;
deleteEntry(placeId);
}
// Reclaim orphaned cache files + meta rows. Runs on startup and nightly (scheduler).
// Two passes: (1) meta rows no place references; (2) stray .jpg files with no meta row.
export function sweepOrphans(): number {
let removed = 0;
const rows = db.prepare('SELECT place_id FROM google_place_photo_meta').all() as { place_id: string }[];
const keepFiles = new Set<string>();
for (const { place_id } of rows) {
if (isReferenced(place_id)) {
keepFiles.add(`${crypto.createHash('sha1').update(place_id).digest('hex')}.jpg`);
} else {
deleteEntry(place_id);
removed++;
}
}
// Pass 2: files on disk that no surviving meta row maps to (e.g. left over from a
// crash between writeFile and the DB upsert, or a meta row deleted out-of-band).
let entries: string[] = [];
try { entries = fs.readdirSync(GOOGLE_PHOTO_DIR); } catch { entries = []; }
for (const entry of entries) {
if (!entry.endsWith('.jpg') || keepFiles.has(entry)) continue;
try { fs.unlinkSync(path.join(GOOGLE_PHOTO_DIR, entry)); removed++; } catch { /* race */ }
}
return removed;
}
+25 -3
View File
@@ -14,6 +14,20 @@ import {
type KmlImportSummary,
} from './kmlImport';
import { enrichImportedPlaces, type EnrichablePlace } from './placeEnrichment';
import * as placePhotoCache from './placePhotoCache';
// Reclaim a deleted place's cached marker photo if nothing else references it.
// The cache key is the Google place_id, or — for coordinate-only places — the
// pseudo-id embedded in the stored proxy URL (/api/maps/place-photo/{id}/bytes).
function reclaimPhotoCache(googlePlaceId: string | null, imageUrl: string | null): void {
const candidates = new Set<string>();
if (googlePlaceId) candidates.add(googlePlaceId);
const m = imageUrl?.match(/^\/api\/maps\/place-photo\/(.+)\/bytes$/);
if (m) { try { candidates.add(decodeURIComponent(m[1])); } catch { /* malformed url */ } }
for (const id of candidates) {
try { placePhotoCache.removeIfUnreferenced(id); } catch { /* best-effort */ }
}
}
/** Opt-in Places-API enrichment for list imports (#886). */
export interface ListImportOptions {
@@ -242,25 +256,33 @@ export function updatePlace(
// ---------------------------------------------------------------------------
export function deletePlace(tripId: string, placeId: string): boolean {
const place = db.prepare('SELECT id FROM places WHERE id = ? AND trip_id = ?').get(placeId, tripId);
const place = db.prepare(
'SELECT google_place_id, image_url FROM places WHERE id = ? AND trip_id = ?'
).get(placeId, tripId) as { google_place_id: string | null; image_url: string | null } | undefined;
if (!place) return false;
db.prepare('DELETE FROM places WHERE id = ?').run(placeId);
reclaimPhotoCache(place.google_place_id, place.image_url);
return true;
}
export function deletePlacesMany(tripId: string, ids: number[]): number[] {
if (ids.length === 0) return [];
const selectStmt = db.prepare('SELECT id FROM places WHERE id = ? AND trip_id = ?');
const selectStmt = db.prepare('SELECT google_place_id, image_url FROM places WHERE id = ? AND trip_id = ?');
const deleteStmt = db.prepare('DELETE FROM places WHERE id = ?');
const deleted: number[] = [];
const reclaimable: { google_place_id: string | null; image_url: string | null }[] = [];
const run = db.transaction((list: number[]) => {
for (const id of list) {
if (!selectStmt.get(id, tripId)) continue;
const row = selectStmt.get(id, tripId) as { google_place_id: string | null; image_url: string | null } | undefined;
if (!row) continue;
deleteStmt.run(id);
deleted.push(id);
reclaimable.push(row);
}
});
run(ids);
// Reclaim after the transaction commits so isReferenced() sees the final place set.
for (const row of reclaimable) reclaimPhotoCache(row.google_place_id, row.image_url);
return deleted;
}