diff --git a/client/src/api/client.ts b/client/src/api/client.ts
index 9acfec4a..eed2b54d 100644
--- a/client/src/api/client.ts
+++ b/client/src/api/client.ts
@@ -664,7 +664,9 @@ export const reservationsApi = {
     const fd = new FormData()
     for (const f of files) fd.append('files', f)
     fd.append('mode', mode)
-    return apiClient.post(`/trips/${tripId}/reservations/import/booking`, fd, { headers: { 'Content-Type': 'multipart/form-data' } }).then(r => r.data)
+    // No client-side timeout: kitinerary + LLM extraction routinely exceeds the
+    // global 8s default (a cold local model alone can take ~45s).
+    return apiClient.post(`/trips/${tripId}/reservations/import/booking`, fd, { headers: { 'Content-Type': 'multipart/form-data' }, timeout: 0 }).then(r => r.data)
   },
   importBookingConfirm: (tripId: number | string, items: BookingImportPreviewItem[]): Promise<BookingImportConfirmResponse> =>
     apiClient.post(`/trips/${tripId}/reservations/import/booking/confirm`, { items }).then(r => r.data),
diff --git a/server/src/nest/llm-parse/clients/openai-compatible.client.ts b/server/src/nest/llm-parse/clients/openai-compatible.client.ts
index c7637214..376eabc4 100644
--- a/server/src/nest/llm-parse/clients/openai-compatible.client.ts
+++ b/server/src/nest/llm-parse/clients/openai-compatible.client.ts
@@ -1,6 +1,8 @@
 import type { LlmExtractionClient, LlmExtractionInput } from '../llm-provider.interface';
 
-const TIMEOUT_MS = 60_000;
+// Generous: a local model (Ollama) may cold-load several GB before its first
+// token, and longer documents push inference past a minute.
+const TIMEOUT_MS = 180_000;
 const MAX_TOKENS = 4096;
 
 /**
diff --git a/server/src/nest/llm-parse/llm-parse.service.ts b/server/src/nest/llm-parse/llm-parse.service.ts
index 42c21164..00db8b1c 100644
--- a/server/src/nest/llm-parse/llm-parse.service.ts
+++ b/server/src/nest/llm-parse/llm-parse.service.ts
@@ -54,6 +54,12 @@ export class LlmParseService {
         );
       } else {
         input.text = await extractText(file.buffer, file.originalName);
+        // Booking details sit at the top of a confirmation; multi-page T&C tails
+        // (rental/insurance docs run 30k+ chars) otherwise overflow the model's
+        // context window — truncating the *relevant* head — and balloon CPU
+        // inference time. Cap the text so only the useful head reaches the LLM.
+        const MAX_EXTRACT_CHARS = 8000;
+        if (input.text.length > MAX_EXTRACT_CHARS) input.text = input.text.slice(0, MAX_EXTRACT_CHARS);
         console.debug(`[DEBUG] Extracted text from ${file.originalName} (${input.text.length} chars):\n`, input.text);
         if (!input.text.trim()) {
           return {