fix(maps): bound place-photo cache growth (Wikimedia + Google) (#1174)

The place-photo cache (uploads/photos/google) grew unbounded: a Wikimedia
geosearch path cached full-res originals despite requesting a 400px thumb,
the writer applied no size guard, nothing reclaimed orphaned files, and
backups archived the whole re-derivable cache verbatim.

- Prefer the scaled `thumburl` over the full-res `info.url` in the Commons
  geosearch fallback.
- Downscale any cached image to <=800px JPEG via the existing jimp dep,
  with a safe fallback to the original bytes on decode failure.
- Add sweepOrphans() (orphaned meta rows + stray files) wired into the
  scheduler (startup + nightly), and removeIfUnreferenced() called on
  place delete for prompt reclamation.
- Exclude the re-derivable photo/trek caches from backups; restores
  self-heal as the cache dirs are recreated at startup.
This commit is contained in:
jubnl
2026-06-14 23:31:02 +02:00
committed by GitHub
parent 3e9626fce9
commit 8077ffab34
10 changed files with 349 additions and 13 deletions
+1
View File
@@ -79,6 +79,7 @@ const onListen = () => {
scheduler.startDemoReset();
scheduler.startIdempotencyCleanup();
scheduler.startTrekPhotoCacheCleanup();
scheduler.startPlacePhotoCacheCleanup();
scheduler.startAirTrailSync();
const { startTokenCleanup } = require('./services/ephemeralTokens');
startTokenCleanup();
+26 -1
View File
@@ -334,6 +334,30 @@ function startTrekPhotoCacheCleanup(): void {
});
}
// Place-photo (Google/Wikimedia) cache cleanup: nightly — reclaim cached files and
// meta rows no place references anymore (deleted places/trips, overwritten image_url).
let placePhotoCacheTask: ScheduledTask | null = null;
function startPlacePhotoCacheCleanup(): void {
if (placePhotoCacheTask) { placePhotoCacheTask.stop(); placePhotoCacheTask = null; }
const sweep = () => {
try {
const { sweepOrphans } = require('./services/placePhotoCache');
const removed = sweepOrphans();
if (removed > 0) logInfo(`Place-photo cache cleanup: removed ${removed} orphaned file(s)/row(s)`);
} catch (err: unknown) {
logError(`Place-photo cache cleanup: ${err instanceof Error ? err.message : err}`);
}
};
// Run once on startup to reclaim orphans left over from before this sweeper existed.
sweep();
const tz = process.env.TZ || 'UTC';
placePhotoCacheTask = cron.schedule('30 3 * * *', sweep, { timezone: tz });
}
// AirTrail sync: poll connected instances on an interval and reconcile linked
// flights both ways (#214). The per-tick enable gate (addon + setting) lives in
// runAirtrailSync, so toggling the addon takes effect without a restart.
@@ -366,7 +390,8 @@ function stop(): void {
if (versionCheckTask) { versionCheckTask.stop(); versionCheckTask = null; }
if (idempotencyCleanupTask) { idempotencyCleanupTask.stop(); idempotencyCleanupTask = null; }
if (trekPhotoCacheTask) { trekPhotoCacheTask.stop(); trekPhotoCacheTask = null; }
if (placePhotoCacheTask) { placePhotoCacheTask.stop(); placePhotoCacheTask = null; }
if (airtrailSyncTask) { airtrailSyncTask.stop(); airtrailSyncTask = null; }
}
export { start, stop, startDemoReset, startTripReminders, startTodoReminders, startVersionCheck, startIdempotencyCleanup, startTrekPhotoCacheCleanup, startAirTrailSync, loadSettings, saveSettings, VALID_INTERVALS };
export { start, stop, startDemoReset, startTripReminders, startTodoReminders, startVersionCheck, startIdempotencyCleanup, startTrekPhotoCacheCleanup, startPlacePhotoCacheCleanup, startAirTrailSync, loadSettings, saveSettings, VALID_INTERVALS };
+8 -1
View File
@@ -156,7 +156,14 @@ export async function createBackup(): Promise<BackupInfo> {
}
if (fs.existsSync(uploadsDir)) {
archive.directory(uploadsDir, 'uploads');
// Exclude the place-photo and trek-memory caches: both are re-derivable
// (re-fetched on demand, keyed on stable ids) and would otherwise dominate
// backup size. Restores self-heal — the cache dirs are recreated at startup.
archive.glob(
'**/*',
{ cwd: uploadsDir, ignore: ['photos/google/**', 'photos/trek/**'], nodir: true, dot: true },
{ prefix: 'uploads' },
);
}
archive.finalize();
+4 -2
View File
@@ -33,7 +33,7 @@ interface OverpassElement {
}
interface WikiCommonsPage {
imageinfo?: { url?: string; extmetadata?: { Artist?: { value?: string } } }[];
imageinfo?: { url?: string; thumburl?: string; extmetadata?: { Artist?: { value?: string } } }[];
}
interface GooglePlaceResult {
@@ -537,7 +537,9 @@ export async function fetchWikimediaPhoto(lat: number, lng: number, name?: strin
const mime = (info as { mime?: string })?.mime || '';
if (info?.url && (mime.startsWith('image/jpeg') || mime.startsWith('image/png'))) {
const attribution = info.extmetadata?.Artist?.value?.replace(/<[^>]+>/g, '').trim() || null;
return { photoUrl: info.url, attribution };
// iiurlwidth=400 makes Commons also return a scaled thumburl. Prefer it —
// info.url is the full-resolution original (multi-megapixel camera exports).
return { photoUrl: info.thumburl ?? info.url, attribution };
}
}
return null;
+78 -2
View File
@@ -2,11 +2,20 @@ import path from 'node:path';
import fs from 'node:fs';
import fsPromises from 'node:fs/promises';
import crypto from 'node:crypto';
import { Jimp, JimpMime } from 'jimp';
import { db } from '../db/database';
const GOOGLE_PHOTO_DIR = path.join(__dirname, '../../uploads/photos/google');
// Overridable for tests (mirrors the TREK_DB_FILE seam) so the suite never touches
// the real uploads tree.
const GOOGLE_PHOTO_DIR = process.env.TREK_PLACE_PHOTO_DIR || path.join(__dirname, '../../uploads/photos/google');
const ERROR_TTL = 5 * 60 * 1000;
// Marker photos are displayed tiny — cap stored images so an oversized source
// (e.g. a Wikimedia Commons full-res original) can't bloat the cache. Matches
// THUMB_MAX/THUMB_QUALITY in memories/thumbnailService.ts.
const MAX_DIM = 800;
const JPEG_QUALITY = 80;
// In-flight dedup — prevents stampedes when multiple requests hit the same uncached placeId simultaneously
const inFlight = new Map<string, Promise<{ filePath: string; attribution: string | null } | null>>();
@@ -74,11 +83,27 @@ export function markError(placeId: string): void {
).run(placeId, Date.now(), Date.now());
}
// Downscale oversized images to MAX_DIM before caching, re-encoding to JPEG.
// Defense-in-depth: keeps the cache small regardless of what the fetch path hands
// us. Jimp auto-applies EXIF orientation on read. Falls back to the original bytes
// on any failure (corrupt/unsupported format) so behaviour is never worse than before.
async function downscale(bytes: Buffer): Promise<Buffer> {
try {
const img = await Jimp.read(bytes);
if (img.bitmap.width <= MAX_DIM && img.bitmap.height <= MAX_DIM) return bytes;
img.scaleToFit({ w: MAX_DIM, h: MAX_DIM });
return await img.getBuffer(JimpMime.jpeg, { quality: JPEG_QUALITY });
} catch {
return bytes;
}
}
export async function put(placeId: string, bytes: Buffer, attribution: string | null): Promise<CachedPhoto> {
const fp = filePath(placeId);
const tmp = fp + '.tmp';
await fsPromises.writeFile(tmp, bytes);
const resized = await downscale(bytes);
await fsPromises.writeFile(tmp, resized);
await fsPromises.rename(tmp, fp);
knownOnDisk.add(placeId);
@@ -108,3 +133,54 @@ export function serveFilePath(placeId: string): string | null {
knownOnDisk.add(placeId);
return fp;
}
// A cache entry is "referenced" while any place still points at it — either by the
// Google place_id (the dedup key) or by the stable proxy URL stored in image_url
// (covers coords: pseudo-ids, which never have a google_place_id).
function isReferenced(placeId: string): boolean {
const row = db.prepare(
'SELECT 1 FROM places WHERE google_place_id = ? OR image_url = ? LIMIT 1'
).get(placeId, proxyUrl(placeId));
return !!row;
}
function deleteEntry(placeId: string): void {
try { fs.unlinkSync(filePath(placeId)); } catch { /* already gone */ }
db.prepare('DELETE FROM google_place_photo_meta WHERE place_id = ?').run(placeId);
knownOnDisk.delete(placeId);
}
// Drop a cache entry if no place references it anymore. Called after a place delete
// for prompt reclamation; the nightly sweep is the catch-all for every other path.
export function removeIfUnreferenced(placeId: string): void {
if (isReferenced(placeId)) return;
deleteEntry(placeId);
}
// Reclaim orphaned cache files + meta rows. Runs on startup and nightly (scheduler).
// Two passes: (1) meta rows no place references; (2) stray .jpg files with no meta row.
export function sweepOrphans(): number {
let removed = 0;
const rows = db.prepare('SELECT place_id FROM google_place_photo_meta').all() as { place_id: string }[];
const keepFiles = new Set<string>();
for (const { place_id } of rows) {
if (isReferenced(place_id)) {
keepFiles.add(`${crypto.createHash('sha1').update(place_id).digest('hex')}.jpg`);
} else {
deleteEntry(place_id);
removed++;
}
}
// Pass 2: files on disk that no surviving meta row maps to (e.g. left over from a
// crash between writeFile and the DB upsert, or a meta row deleted out-of-band).
let entries: string[] = [];
try { entries = fs.readdirSync(GOOGLE_PHOTO_DIR); } catch { entries = []; }
for (const entry of entries) {
if (!entry.endsWith('.jpg') || keepFiles.has(entry)) continue;
try { fs.unlinkSync(path.join(GOOGLE_PHOTO_DIR, entry)); removed++; } catch { /* race */ }
}
return removed;
}
+25 -3
View File
@@ -14,6 +14,20 @@ import {
type KmlImportSummary,
} from './kmlImport';
import { enrichImportedPlaces, type EnrichablePlace } from './placeEnrichment';
import * as placePhotoCache from './placePhotoCache';
// Reclaim a deleted place's cached marker photo if nothing else references it.
// The cache key is the Google place_id, or — for coordinate-only places — the
// pseudo-id embedded in the stored proxy URL (/api/maps/place-photo/{id}/bytes).
function reclaimPhotoCache(googlePlaceId: string | null, imageUrl: string | null): void {
const candidates = new Set<string>();
if (googlePlaceId) candidates.add(googlePlaceId);
const m = imageUrl?.match(/^\/api\/maps\/place-photo\/(.+)\/bytes$/);
if (m) { try { candidates.add(decodeURIComponent(m[1])); } catch { /* malformed url */ } }
for (const id of candidates) {
try { placePhotoCache.removeIfUnreferenced(id); } catch { /* best-effort */ }
}
}
/** Opt-in Places-API enrichment for list imports (#886). */
export interface ListImportOptions {
@@ -242,25 +256,33 @@ export function updatePlace(
// ---------------------------------------------------------------------------
export function deletePlace(tripId: string, placeId: string): boolean {
const place = db.prepare('SELECT id FROM places WHERE id = ? AND trip_id = ?').get(placeId, tripId);
const place = db.prepare(
'SELECT google_place_id, image_url FROM places WHERE id = ? AND trip_id = ?'
).get(placeId, tripId) as { google_place_id: string | null; image_url: string | null } | undefined;
if (!place) return false;
db.prepare('DELETE FROM places WHERE id = ?').run(placeId);
reclaimPhotoCache(place.google_place_id, place.image_url);
return true;
}
export function deletePlacesMany(tripId: string, ids: number[]): number[] {
if (ids.length === 0) return [];
const selectStmt = db.prepare('SELECT id FROM places WHERE id = ? AND trip_id = ?');
const selectStmt = db.prepare('SELECT google_place_id, image_url FROM places WHERE id = ? AND trip_id = ?');
const deleteStmt = db.prepare('DELETE FROM places WHERE id = ?');
const deleted: number[] = [];
const reclaimable: { google_place_id: string | null; image_url: string | null }[] = [];
const run = db.transaction((list: number[]) => {
for (const id of list) {
if (!selectStmt.get(id, tripId)) continue;
const row = selectStmt.get(id, tripId) as { google_place_id: string | null; image_url: string | null } | undefined;
if (!row) continue;
deleteStmt.run(id);
deleted.push(id);
reclaimable.push(row);
}
});
run(ids);
// Reclaim after the transaction commits so isReferenced() sees the final place set.
for (const row of reclaimable) reclaimPhotoCache(row.google_place_id, row.image_url);
return deleted;
}
@@ -25,6 +25,7 @@ const archiverInstanceMock = vi.hoisted(() => ({
pipe: vi.fn(),
file: vi.fn(),
directory: vi.fn(),
glob: vi.fn(),
finalize: vi.fn(),
on: vi.fn(),
}));
@@ -441,7 +442,7 @@ describe('BACKUP-036 createBackup', () => {
);
});
it('BACKUP-036e — includes uploads directory when it exists', async () => {
it('BACKUP-036e — includes uploads but excludes the re-derivable photo caches', async () => {
fsMock.existsSync.mockImplementation((p: string) => {
if (String(p).endsWith('uploads')) return true;
return false;
@@ -467,10 +468,16 @@ describe('BACKUP-036 createBackup', () => {
await createBackup();
expect(archiverInstanceMock.directory).toHaveBeenCalledWith(
expect.stringContaining('uploads'),
'uploads'
expect(archiverInstanceMock.glob).toHaveBeenCalledWith(
'**/*',
expect.objectContaining({
cwd: expect.stringContaining('uploads'),
ignore: ['photos/google/**', 'photos/trek/**'],
}),
{ prefix: 'uploads' },
);
// The re-derivable caches must not be archived verbatim.
expect(archiverInstanceMock.directory).not.toHaveBeenCalled();
});
});
@@ -538,6 +538,31 @@ describe('fetchWikimediaPhoto (fetch stubbed)', () => {
expect(result!.attribution).toBe('Alice');
});
it('MAPS-036b: geosearch prefers the scaled thumburl over the full-res original', async () => {
const wikiResponse = { ok: true, json: async () => ({ query: { pages: { '-1': {} } } }) };
const commonsResponse = {
ok: true,
json: async () => ({
query: { pages: { '1': {
imageinfo: [{
url: 'https://commons.org/original-16mb.jpg',
thumburl: 'https://commons.org/thumb-400.jpg',
mime: 'image/jpeg',
extmetadata: { Artist: { value: 'Alice' } },
}],
} } },
}),
};
vi.stubGlobal('fetch', vi.fn()
.mockResolvedValueOnce(wikiResponse)
.mockResolvedValueOnce(commonsResponse));
const { fetchWikimediaPhoto } = await import('../../../src/services/mapsService');
const result = await fetchWikimediaPhoto(48.8, 2.3, 'Some Place');
expect(result).toBeDefined();
expect(result!.photoUrl).toBe('https://commons.org/thumb-400.jpg');
expect(result!.attribution).toBe('Alice');
});
it('MAPS-037: returns null when both strategies find nothing', async () => {
vi.stubGlobal('fetch', vi.fn().mockResolvedValue({
ok: true,
@@ -0,0 +1,151 @@
/**
* Unit tests for placePhotoCache — PPC-001 through PPC-010.
* Covers the downscale guard in put(), removeIfUnreferenced(), and sweepOrphans().
* Uses a real in-memory SQLite DB and a throwaway temp upload dir
* (TREK_PLACE_PHOTO_DIR) so the real uploads tree is never touched.
*/
import { describe, it, expect, beforeAll, beforeEach, afterAll, vi } from 'vitest';
import path from 'node:path';
import fs from 'node:fs';
import os from 'node:os';
import crypto from 'node:crypto';
import { Jimp, JimpMime } from 'jimp';
import Database from 'better-sqlite3';
// Throwaway upload dir — set before importing the module under test (it reads the
// env at load time and mkdirs the dir).
const TMP_DIR = fs.mkdtempSync(path.join(os.tmpdir(), 'ppc-'));
process.env.TREK_PLACE_PHOTO_DIR = TMP_DIR;
// Minimal real DB with just the two tables placePhotoCache touches.
const testDb = new Database(':memory:');
testDb.exec(`
CREATE TABLE places (
id INTEGER PRIMARY KEY AUTOINCREMENT,
google_place_id TEXT,
image_url TEXT
);
CREATE TABLE google_place_photo_meta (
place_id TEXT PRIMARY KEY,
attribution TEXT,
fetched_at INTEGER NOT NULL,
error_at INTEGER
);
`);
vi.mock('../../../src/db/database', () => ({ db: testDb }));
function filePathFor(placeId: string): string {
const hash = crypto.createHash('sha1').update(placeId).digest('hex');
return path.join(TMP_DIR, `${hash}.jpg`);
}
async function makeJpeg(width: number, height: number): Promise<Buffer> {
const img = new Jimp({ width, height, color: 0xff0000ff });
return img.getBuffer(JimpMime.jpeg, { quality: 80 });
}
let cache: typeof import('../../../src/services/placePhotoCache');
beforeAll(async () => {
cache = await import('../../../src/services/placePhotoCache');
});
beforeEach(() => {
testDb.exec('DELETE FROM places; DELETE FROM google_place_photo_meta;');
for (const f of fs.readdirSync(TMP_DIR)) fs.rmSync(path.join(TMP_DIR, f), { force: true });
});
afterAll(() => {
testDb.close();
fs.rmSync(TMP_DIR, { recursive: true, force: true });
});
describe('placePhotoCache.put() downscale guard', () => {
it('PPC-001: downscales an oversized image to <= 800px', async () => {
const big = await makeJpeg(1600, 1200);
await cache.put('big-place', big, 'Alice');
const written = fs.readFileSync(filePathFor('big-place'));
const decoded = await Jimp.read(written);
expect(Math.max(decoded.bitmap.width, decoded.bitmap.height)).toBeLessThanOrEqual(800);
expect(written.length).toBeLessThan(big.length);
});
it('PPC-002: passes a small image through unchanged', async () => {
const small = await makeJpeg(100, 100);
await cache.put('small-place', small, null);
const written = fs.readFileSync(filePathFor('small-place'));
expect(written.equals(small)).toBe(true);
});
it('PPC-003: falls back to original bytes when the input is not a decodable image', async () => {
const garbage = Buffer.from('definitely not an image');
await cache.put('garbage-place', garbage, null);
const written = fs.readFileSync(filePathFor('garbage-place'));
expect(written.equals(garbage)).toBe(true);
});
});
describe('placePhotoCache.removeIfUnreferenced()', () => {
it('PPC-004: removes a cache entry that no place references', async () => {
await cache.put('orphan', await makeJpeg(50, 50), null);
expect(fs.existsSync(filePathFor('orphan'))).toBe(true);
cache.removeIfUnreferenced('orphan');
expect(fs.existsSync(filePathFor('orphan'))).toBe(false);
expect(testDb.prepare('SELECT 1 FROM google_place_photo_meta WHERE place_id = ?').get('orphan')).toBeUndefined();
});
it('PPC-005: keeps an entry still referenced by google_place_id', async () => {
await cache.put('gid-1', await makeJpeg(50, 50), null);
testDb.prepare('INSERT INTO places (google_place_id) VALUES (?)').run('gid-1');
cache.removeIfUnreferenced('gid-1');
expect(fs.existsSync(filePathFor('gid-1'))).toBe(true);
});
it('PPC-006: keeps an entry referenced by a coords proxy URL in image_url', async () => {
const id = 'coords:48.8:2.3';
await cache.put(id, await makeJpeg(50, 50), null);
const proxy = `/api/maps/place-photo/${encodeURIComponent(id)}/bytes`;
testDb.prepare('INSERT INTO places (image_url) VALUES (?)').run(proxy);
cache.removeIfUnreferenced(id);
expect(fs.existsSync(filePathFor(id))).toBe(true);
});
});
describe('placePhotoCache.sweepOrphans()', () => {
it('PPC-007: removes orphaned meta rows + files, keeps referenced ones, deletes stray files', async () => {
await cache.put('keep-gid', await makeJpeg(50, 50), null);
await cache.put('drop-me', await makeJpeg(50, 50), null);
testDb.prepare('INSERT INTO places (google_place_id) VALUES (?)').run('keep-gid');
// A stray .jpg on disk with no meta row (e.g. a crash between write and upsert).
const strayPath = path.join(TMP_DIR, 'deadbeef'.padEnd(40, '0') + '.jpg');
fs.writeFileSync(strayPath, 'stray');
const removed = cache.sweepOrphans();
expect(fs.existsSync(filePathFor('keep-gid'))).toBe(true);
expect(fs.existsSync(filePathFor('drop-me'))).toBe(false);
expect(fs.existsSync(strayPath)).toBe(false);
expect(testDb.prepare('SELECT 1 FROM google_place_photo_meta WHERE place_id = ?').get('drop-me')).toBeUndefined();
expect(testDb.prepare('SELECT 1 FROM google_place_photo_meta WHERE place_id = ?').get('keep-gid')).toBeDefined();
expect(removed).toBe(2); // drop-me (orphan meta+file) + stray file
});
it('PPC-008: returns 0 when every entry is referenced', async () => {
await cache.put('ref-a', await makeJpeg(50, 50), null);
testDb.prepare('INSERT INTO places (google_place_id) VALUES (?)').run('ref-a');
expect(cache.sweepOrphans()).toBe(0);
expect(fs.existsSync(filePathFor('ref-a'))).toBe(true);
});
});
@@ -41,6 +41,14 @@ vi.mock('../../../src/config', () => ({
updateJwtSecret: () => {},
}));
// Spy on the photo-cache reclaim hook so delete tests assert the wiring without
// touching disk. The removal logic itself is covered in placePhotoCache.test.ts.
const { removeIfUnreferencedSpy } = vi.hoisted(() => ({ removeIfUnreferencedSpy: vi.fn() }));
vi.mock('../../../src/services/placePhotoCache', async (importOriginal) => ({
...(await importOriginal<typeof import('../../../src/services/placePhotoCache')>()),
removeIfUnreferenced: removeIfUnreferencedSpy,
}));
import { createTables } from '../../../src/db/schema';
import { runMigrations } from '../../../src/db/migrations';
import { resetTestDb } from '../../helpers/test-db';
@@ -252,6 +260,18 @@ describe('deletePlace', () => {
expect(remaining).toHaveLength(1);
expect(remaining[0].id).toBe(p1.id);
});
it('PLACE-SVC-019b — reclaims the photo cache for the deleted place', () => {
removeIfUnreferencedSpy.mockClear();
const { user } = createUser(testDb);
const trip = createTrip(testDb, user.id);
const place = createPlace(testDb, trip.id, { name: 'With Photo' }) as any;
testDb.prepare('UPDATE places SET google_place_id = ? WHERE id = ?').run('ChIJgid', place.id);
deletePlace(String(trip.id), String(place.id));
expect(removeIfUnreferencedSpy).toHaveBeenCalledWith('ChIJgid');
});
});
// ── importGpx ─────────────────────────────────────────────────────────────────