diff --git a/client/src/api/client.ts b/client/src/api/client.ts index 9acfec4a..eed2b54d 100644 --- a/client/src/api/client.ts +++ b/client/src/api/client.ts @@ -664,7 +664,9 @@ export const reservationsApi = { const fd = new FormData() for (const f of files) fd.append('files', f) fd.append('mode', mode) - return apiClient.post(`/trips/${tripId}/reservations/import/booking`, fd, { headers: { 'Content-Type': 'multipart/form-data' } }).then(r => r.data) + // No client-side timeout: kitinerary + LLM extraction routinely exceeds the + // global 8s default (a cold local model alone can take ~45s). + return apiClient.post(`/trips/${tripId}/reservations/import/booking`, fd, { headers: { 'Content-Type': 'multipart/form-data' }, timeout: 0 }).then(r => r.data) }, importBookingConfirm: (tripId: number | string, items: BookingImportPreviewItem[]): Promise => apiClient.post(`/trips/${tripId}/reservations/import/booking/confirm`, { items }).then(r => r.data), diff --git a/server/src/nest/llm-parse/clients/openai-compatible.client.ts b/server/src/nest/llm-parse/clients/openai-compatible.client.ts index c7637214..376eabc4 100644 --- a/server/src/nest/llm-parse/clients/openai-compatible.client.ts +++ b/server/src/nest/llm-parse/clients/openai-compatible.client.ts @@ -1,6 +1,8 @@ import type { LlmExtractionClient, LlmExtractionInput } from '../llm-provider.interface'; -const TIMEOUT_MS = 60_000; +// Generous: a local model (Ollama) may cold-load several GB before its first +// token, and longer documents push inference past a minute. +const TIMEOUT_MS = 180_000; const MAX_TOKENS = 4096; /** diff --git a/server/src/nest/llm-parse/llm-parse.service.ts b/server/src/nest/llm-parse/llm-parse.service.ts index 42c21164..00db8b1c 100644 --- a/server/src/nest/llm-parse/llm-parse.service.ts +++ b/server/src/nest/llm-parse/llm-parse.service.ts @@ -54,6 +54,12 @@ export class LlmParseService { ); } else { input.text = await extractText(file.buffer, file.originalName); + // Booking details sit at the top of a confirmation; multi-page T&C tails + // (rental/insurance docs run 30k+ chars) otherwise overflow the model's + // context window — truncating the *relevant* head — and balloon CPU + // inference time. Cap the text so only the useful head reaches the LLM. + const MAX_EXTRACT_CHARS = 8000; + if (input.text.length > MAX_EXTRACT_CHARS) input.text = input.text.slice(0, MAX_EXTRACT_CHARS); console.debug(`[DEBUG] Extracted text from ${file.originalName} (${input.text.length} chars):\n`, input.text); if (!input.text.trim()) { return {