mirror of
https://github.com/mauriceboe/TREK.git
synced 2026-06-19 21:31:46 +00:00
fix(maps): bound place-photo cache growth (Wikimedia + Google) (#1174)
The place-photo cache (uploads/photos/google) grew unbounded: a Wikimedia geosearch path cached full-res originals despite requesting a 400px thumb, the writer applied no size guard, nothing reclaimed orphaned files, and backups archived the whole re-derivable cache verbatim. - Prefer the scaled `thumburl` over the full-res `info.url` in the Commons geosearch fallback. - Downscale any cached image to <=800px JPEG via the existing jimp dep, with a safe fallback to the original bytes on decode failure. - Add sweepOrphans() (orphaned meta rows + stray files) wired into the scheduler (startup + nightly), and removeIfUnreferenced() called on place delete for prompt reclamation. - Exclude the re-derivable photo/trek caches from backups; restores self-heal as the cache dirs are recreated at startup.
This commit is contained in:
@@ -79,6 +79,7 @@ const onListen = () => {
|
||||
scheduler.startDemoReset();
|
||||
scheduler.startIdempotencyCleanup();
|
||||
scheduler.startTrekPhotoCacheCleanup();
|
||||
scheduler.startPlacePhotoCacheCleanup();
|
||||
scheduler.startAirTrailSync();
|
||||
const { startTokenCleanup } = require('./services/ephemeralTokens');
|
||||
startTokenCleanup();
|
||||
|
||||
+26
-1
@@ -334,6 +334,30 @@ function startTrekPhotoCacheCleanup(): void {
|
||||
});
|
||||
}
|
||||
|
||||
// Place-photo (Google/Wikimedia) cache cleanup: nightly — reclaim cached files and
|
||||
// meta rows no place references anymore (deleted places/trips, overwritten image_url).
|
||||
let placePhotoCacheTask: ScheduledTask | null = null;
|
||||
|
||||
function startPlacePhotoCacheCleanup(): void {
|
||||
if (placePhotoCacheTask) { placePhotoCacheTask.stop(); placePhotoCacheTask = null; }
|
||||
|
||||
const sweep = () => {
|
||||
try {
|
||||
const { sweepOrphans } = require('./services/placePhotoCache');
|
||||
const removed = sweepOrphans();
|
||||
if (removed > 0) logInfo(`Place-photo cache cleanup: removed ${removed} orphaned file(s)/row(s)`);
|
||||
} catch (err: unknown) {
|
||||
logError(`Place-photo cache cleanup: ${err instanceof Error ? err.message : err}`);
|
||||
}
|
||||
};
|
||||
|
||||
// Run once on startup to reclaim orphans left over from before this sweeper existed.
|
||||
sweep();
|
||||
|
||||
const tz = process.env.TZ || 'UTC';
|
||||
placePhotoCacheTask = cron.schedule('30 3 * * *', sweep, { timezone: tz });
|
||||
}
|
||||
|
||||
// AirTrail sync: poll connected instances on an interval and reconcile linked
|
||||
// flights both ways (#214). The per-tick enable gate (addon + setting) lives in
|
||||
// runAirtrailSync, so toggling the addon takes effect without a restart.
|
||||
@@ -366,7 +390,8 @@ function stop(): void {
|
||||
if (versionCheckTask) { versionCheckTask.stop(); versionCheckTask = null; }
|
||||
if (idempotencyCleanupTask) { idempotencyCleanupTask.stop(); idempotencyCleanupTask = null; }
|
||||
if (trekPhotoCacheTask) { trekPhotoCacheTask.stop(); trekPhotoCacheTask = null; }
|
||||
if (placePhotoCacheTask) { placePhotoCacheTask.stop(); placePhotoCacheTask = null; }
|
||||
if (airtrailSyncTask) { airtrailSyncTask.stop(); airtrailSyncTask = null; }
|
||||
}
|
||||
|
||||
export { start, stop, startDemoReset, startTripReminders, startTodoReminders, startVersionCheck, startIdempotencyCleanup, startTrekPhotoCacheCleanup, startAirTrailSync, loadSettings, saveSettings, VALID_INTERVALS };
|
||||
export { start, stop, startDemoReset, startTripReminders, startTodoReminders, startVersionCheck, startIdempotencyCleanup, startTrekPhotoCacheCleanup, startPlacePhotoCacheCleanup, startAirTrailSync, loadSettings, saveSettings, VALID_INTERVALS };
|
||||
|
||||
@@ -156,7 +156,14 @@ export async function createBackup(): Promise<BackupInfo> {
|
||||
}
|
||||
|
||||
if (fs.existsSync(uploadsDir)) {
|
||||
archive.directory(uploadsDir, 'uploads');
|
||||
// Exclude the place-photo and trek-memory caches: both are re-derivable
|
||||
// (re-fetched on demand, keyed on stable ids) and would otherwise dominate
|
||||
// backup size. Restores self-heal — the cache dirs are recreated at startup.
|
||||
archive.glob(
|
||||
'**/*',
|
||||
{ cwd: uploadsDir, ignore: ['photos/google/**', 'photos/trek/**'], nodir: true, dot: true },
|
||||
{ prefix: 'uploads' },
|
||||
);
|
||||
}
|
||||
|
||||
archive.finalize();
|
||||
|
||||
@@ -33,7 +33,7 @@ interface OverpassElement {
|
||||
}
|
||||
|
||||
interface WikiCommonsPage {
|
||||
imageinfo?: { url?: string; extmetadata?: { Artist?: { value?: string } } }[];
|
||||
imageinfo?: { url?: string; thumburl?: string; extmetadata?: { Artist?: { value?: string } } }[];
|
||||
}
|
||||
|
||||
interface GooglePlaceResult {
|
||||
@@ -537,7 +537,9 @@ export async function fetchWikimediaPhoto(lat: number, lng: number, name?: strin
|
||||
const mime = (info as { mime?: string })?.mime || '';
|
||||
if (info?.url && (mime.startsWith('image/jpeg') || mime.startsWith('image/png'))) {
|
||||
const attribution = info.extmetadata?.Artist?.value?.replace(/<[^>]+>/g, '').trim() || null;
|
||||
return { photoUrl: info.url, attribution };
|
||||
// iiurlwidth=400 makes Commons also return a scaled thumburl. Prefer it —
|
||||
// info.url is the full-resolution original (multi-megapixel camera exports).
|
||||
return { photoUrl: info.thumburl ?? info.url, attribution };
|
||||
}
|
||||
}
|
||||
return null;
|
||||
|
||||
@@ -2,11 +2,20 @@ import path from 'node:path';
|
||||
import fs from 'node:fs';
|
||||
import fsPromises from 'node:fs/promises';
|
||||
import crypto from 'node:crypto';
|
||||
import { Jimp, JimpMime } from 'jimp';
|
||||
import { db } from '../db/database';
|
||||
|
||||
const GOOGLE_PHOTO_DIR = path.join(__dirname, '../../uploads/photos/google');
|
||||
// Overridable for tests (mirrors the TREK_DB_FILE seam) so the suite never touches
|
||||
// the real uploads tree.
|
||||
const GOOGLE_PHOTO_DIR = process.env.TREK_PLACE_PHOTO_DIR || path.join(__dirname, '../../uploads/photos/google');
|
||||
const ERROR_TTL = 5 * 60 * 1000;
|
||||
|
||||
// Marker photos are displayed tiny — cap stored images so an oversized source
|
||||
// (e.g. a Wikimedia Commons full-res original) can't bloat the cache. Matches
|
||||
// THUMB_MAX/THUMB_QUALITY in memories/thumbnailService.ts.
|
||||
const MAX_DIM = 800;
|
||||
const JPEG_QUALITY = 80;
|
||||
|
||||
// In-flight dedup — prevents stampedes when multiple requests hit the same uncached placeId simultaneously
|
||||
const inFlight = new Map<string, Promise<{ filePath: string; attribution: string | null } | null>>();
|
||||
|
||||
@@ -74,11 +83,27 @@ export function markError(placeId: string): void {
|
||||
).run(placeId, Date.now(), Date.now());
|
||||
}
|
||||
|
||||
// Downscale oversized images to MAX_DIM before caching, re-encoding to JPEG.
|
||||
// Defense-in-depth: keeps the cache small regardless of what the fetch path hands
|
||||
// us. Jimp auto-applies EXIF orientation on read. Falls back to the original bytes
|
||||
// on any failure (corrupt/unsupported format) so behaviour is never worse than before.
|
||||
async function downscale(bytes: Buffer): Promise<Buffer> {
|
||||
try {
|
||||
const img = await Jimp.read(bytes);
|
||||
if (img.bitmap.width <= MAX_DIM && img.bitmap.height <= MAX_DIM) return bytes;
|
||||
img.scaleToFit({ w: MAX_DIM, h: MAX_DIM });
|
||||
return await img.getBuffer(JimpMime.jpeg, { quality: JPEG_QUALITY });
|
||||
} catch {
|
||||
return bytes;
|
||||
}
|
||||
}
|
||||
|
||||
export async function put(placeId: string, bytes: Buffer, attribution: string | null): Promise<CachedPhoto> {
|
||||
const fp = filePath(placeId);
|
||||
const tmp = fp + '.tmp';
|
||||
|
||||
await fsPromises.writeFile(tmp, bytes);
|
||||
const resized = await downscale(bytes);
|
||||
await fsPromises.writeFile(tmp, resized);
|
||||
await fsPromises.rename(tmp, fp);
|
||||
|
||||
knownOnDisk.add(placeId);
|
||||
@@ -108,3 +133,54 @@ export function serveFilePath(placeId: string): string | null {
|
||||
knownOnDisk.add(placeId);
|
||||
return fp;
|
||||
}
|
||||
|
||||
// A cache entry is "referenced" while any place still points at it — either by the
|
||||
// Google place_id (the dedup key) or by the stable proxy URL stored in image_url
|
||||
// (covers coords: pseudo-ids, which never have a google_place_id).
|
||||
function isReferenced(placeId: string): boolean {
|
||||
const row = db.prepare(
|
||||
'SELECT 1 FROM places WHERE google_place_id = ? OR image_url = ? LIMIT 1'
|
||||
).get(placeId, proxyUrl(placeId));
|
||||
return !!row;
|
||||
}
|
||||
|
||||
function deleteEntry(placeId: string): void {
|
||||
try { fs.unlinkSync(filePath(placeId)); } catch { /* already gone */ }
|
||||
db.prepare('DELETE FROM google_place_photo_meta WHERE place_id = ?').run(placeId);
|
||||
knownOnDisk.delete(placeId);
|
||||
}
|
||||
|
||||
// Drop a cache entry if no place references it anymore. Called after a place delete
|
||||
// for prompt reclamation; the nightly sweep is the catch-all for every other path.
|
||||
export function removeIfUnreferenced(placeId: string): void {
|
||||
if (isReferenced(placeId)) return;
|
||||
deleteEntry(placeId);
|
||||
}
|
||||
|
||||
// Reclaim orphaned cache files + meta rows. Runs on startup and nightly (scheduler).
|
||||
// Two passes: (1) meta rows no place references; (2) stray .jpg files with no meta row.
|
||||
export function sweepOrphans(): number {
|
||||
let removed = 0;
|
||||
|
||||
const rows = db.prepare('SELECT place_id FROM google_place_photo_meta').all() as { place_id: string }[];
|
||||
const keepFiles = new Set<string>();
|
||||
for (const { place_id } of rows) {
|
||||
if (isReferenced(place_id)) {
|
||||
keepFiles.add(`${crypto.createHash('sha1').update(place_id).digest('hex')}.jpg`);
|
||||
} else {
|
||||
deleteEntry(place_id);
|
||||
removed++;
|
||||
}
|
||||
}
|
||||
|
||||
// Pass 2: files on disk that no surviving meta row maps to (e.g. left over from a
|
||||
// crash between writeFile and the DB upsert, or a meta row deleted out-of-band).
|
||||
let entries: string[] = [];
|
||||
try { entries = fs.readdirSync(GOOGLE_PHOTO_DIR); } catch { entries = []; }
|
||||
for (const entry of entries) {
|
||||
if (!entry.endsWith('.jpg') || keepFiles.has(entry)) continue;
|
||||
try { fs.unlinkSync(path.join(GOOGLE_PHOTO_DIR, entry)); removed++; } catch { /* race */ }
|
||||
}
|
||||
|
||||
return removed;
|
||||
}
|
||||
|
||||
@@ -14,6 +14,20 @@ import {
|
||||
type KmlImportSummary,
|
||||
} from './kmlImport';
|
||||
import { enrichImportedPlaces, type EnrichablePlace } from './placeEnrichment';
|
||||
import * as placePhotoCache from './placePhotoCache';
|
||||
|
||||
// Reclaim a deleted place's cached marker photo if nothing else references it.
|
||||
// The cache key is the Google place_id, or — for coordinate-only places — the
|
||||
// pseudo-id embedded in the stored proxy URL (/api/maps/place-photo/{id}/bytes).
|
||||
function reclaimPhotoCache(googlePlaceId: string | null, imageUrl: string | null): void {
|
||||
const candidates = new Set<string>();
|
||||
if (googlePlaceId) candidates.add(googlePlaceId);
|
||||
const m = imageUrl?.match(/^\/api\/maps\/place-photo\/(.+)\/bytes$/);
|
||||
if (m) { try { candidates.add(decodeURIComponent(m[1])); } catch { /* malformed url */ } }
|
||||
for (const id of candidates) {
|
||||
try { placePhotoCache.removeIfUnreferenced(id); } catch { /* best-effort */ }
|
||||
}
|
||||
}
|
||||
|
||||
/** Opt-in Places-API enrichment for list imports (#886). */
|
||||
export interface ListImportOptions {
|
||||
@@ -242,25 +256,33 @@ export function updatePlace(
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export function deletePlace(tripId: string, placeId: string): boolean {
|
||||
const place = db.prepare('SELECT id FROM places WHERE id = ? AND trip_id = ?').get(placeId, tripId);
|
||||
const place = db.prepare(
|
||||
'SELECT google_place_id, image_url FROM places WHERE id = ? AND trip_id = ?'
|
||||
).get(placeId, tripId) as { google_place_id: string | null; image_url: string | null } | undefined;
|
||||
if (!place) return false;
|
||||
db.prepare('DELETE FROM places WHERE id = ?').run(placeId);
|
||||
reclaimPhotoCache(place.google_place_id, place.image_url);
|
||||
return true;
|
||||
}
|
||||
|
||||
export function deletePlacesMany(tripId: string, ids: number[]): number[] {
|
||||
if (ids.length === 0) return [];
|
||||
const selectStmt = db.prepare('SELECT id FROM places WHERE id = ? AND trip_id = ?');
|
||||
const selectStmt = db.prepare('SELECT google_place_id, image_url FROM places WHERE id = ? AND trip_id = ?');
|
||||
const deleteStmt = db.prepare('DELETE FROM places WHERE id = ?');
|
||||
const deleted: number[] = [];
|
||||
const reclaimable: { google_place_id: string | null; image_url: string | null }[] = [];
|
||||
const run = db.transaction((list: number[]) => {
|
||||
for (const id of list) {
|
||||
if (!selectStmt.get(id, tripId)) continue;
|
||||
const row = selectStmt.get(id, tripId) as { google_place_id: string | null; image_url: string | null } | undefined;
|
||||
if (!row) continue;
|
||||
deleteStmt.run(id);
|
||||
deleted.push(id);
|
||||
reclaimable.push(row);
|
||||
}
|
||||
});
|
||||
run(ids);
|
||||
// Reclaim after the transaction commits so isReferenced() sees the final place set.
|
||||
for (const row of reclaimable) reclaimPhotoCache(row.google_place_id, row.image_url);
|
||||
return deleted;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user