From 574c54c16c68e31ada97ce304b97caca7df77c34 Mon Sep 17 00:00:00 2001 From: Maurice Date: Fri, 26 Jun 2026 16:08:32 +0200 Subject: [PATCH] perf(extract): cap single-booking text tighter; require rental company A long single-booking PDF (e.g. an 11-page rental voucher) spent ~200s on CPU prompt-eval at the 16k cap, though its data sits in the first ~2k. Cap non-flight docs at 6k (flights keep 16k for all legs). Also make the rental operator a required field so the car gets a real title. --- server/src/nest/llm-parse/llm-parse.service.ts | 12 +++++++----- server/src/nest/llm-parse/router/flat-schemas.ts | 4 +++- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/server/src/nest/llm-parse/llm-parse.service.ts b/server/src/nest/llm-parse/llm-parse.service.ts index 7644fdf5..322ea08b 100644 --- a/server/src/nest/llm-parse/llm-parse.service.ts +++ b/server/src/nest/llm-parse/llm-parse.service.ts @@ -4,7 +4,7 @@ import { resolveLlmConfig } from './llm-config.resolver'; import { buildSystemPrompt, KI_RESERVATION_JSON_SCHEMA } from './llm-prompt'; import type { LlmExtractionInput } from './llm-provider.interface'; import { isPdf, extractText } from './text-extract'; -import { routeExtraction } from './router/extraction-router'; +import { routeExtraction, detectFlightNumbers } from './router/extraction-router'; import { Injectable } from '@nestjs/common'; import { kiReservationSchema } from '@trek/shared'; @@ -55,10 +55,12 @@ export class LlmParseService { ); } else { input.text = await extractText(file.buffer, file.originalName); - // The local router decomposes the document and extracts one reservation at a - // time, so it tolerates more text than the single-shot path (which had to cap - // at 4000 to fit a small context). Cloud single-shot keeps the tight cap. - const MAX_EXTRACT_CHARS = config.provider === 'local' ? 16000 : 4000; + // Cap the text fed to the model. A flight itinerary lists its legs throughout a long + // document, so it keeps a generous window; a single booking has the essentials up top, + // so cap it tighter to keep CPU prompt-eval fast (a 11-page rental voucher was ~200s at + // 16k, the booking data sits in the first ~2k). Cloud single-shot keeps the tight cap. + const MAX_EXTRACT_CHARS = + config.provider !== 'local' ? 4000 : detectFlightNumbers(input.text).length > 0 ? 16000 : 6000; if (input.text.length > MAX_EXTRACT_CHARS) input.text = input.text.slice(0, MAX_EXTRACT_CHARS); console.debug(`[DEBUG] Extracted text from ${file.originalName} (${input.text.length} chars):\n`, input.text); if (!input.text.trim()) { diff --git a/server/src/nest/llm-parse/router/flat-schemas.ts b/server/src/nest/llm-parse/router/flat-schemas.ts index 0f8f3a85..877e5b31 100644 --- a/server/src/nest/llm-parse/router/flat-schemas.ts +++ b/server/src/nest/llm-parse/router/flat-schemas.ts @@ -57,7 +57,9 @@ export const FLAT_SCHEMA_BY_TYPE: Record = { ), car: flat( ['booking_reference', 'operator', 'name', 'from_name', 'to_name', 'departure_time', 'arrival_time', 'price', 'currency'], - ['from_name', 'departure_time', 'arrival_time'], + // `operator` (rental company) is REQUIRED so the booking gets a real title instead of the + // generic "Rental Car" fallback. + ['operator', 'from_name', 'departure_time', 'arrival_time'], ), hotel: flat( ['name', 'booking_reference', 'address', 'checkin_time', 'checkout_time', 'telephone', 'website', 'price', 'currency'],