mirror of
https://github.com/mauriceboe/TREK.git
synced 2026-06-30 18:46:00 +00:00
feat(extract): extract data using LLM
This commit is contained in:
@@ -42,6 +42,7 @@
|
||||
"node-cron": "^4.2.1",
|
||||
"nodemailer": "^9.0.1",
|
||||
"otplib": "^12.0.1",
|
||||
"pdf-parse": "^2.4.5",
|
||||
"qrcode": "^1.5.4",
|
||||
"reflect-metadata": "^0.2.2",
|
||||
"rxjs": "^7.8.2",
|
||||
|
||||
@@ -8,6 +8,7 @@ export const ADDON_IDS = {
|
||||
COLLAB: 'collab',
|
||||
JOURNEY: 'journey',
|
||||
AIRTRAIL: 'airtrail',
|
||||
LLM_PARSING: 'llm_parsing',
|
||||
} as const;
|
||||
|
||||
export type AddonId = typeof ADDON_IDS[keyof typeof ADDON_IDS];
|
||||
|
||||
@@ -104,6 +104,7 @@ function seedAddons(db: Database.Database): void {
|
||||
{ id: 'collab', name: 'Collab', description: 'Notes, polls, and live chat for trip collaboration', type: 'trip', icon: 'Users', enabled: 1, sort_order: 6 },
|
||||
{ id: 'journey', name: 'Journey', description: 'Trip tracking & travel journal — check-ins, photos, daily stories', type: 'global', icon: 'Compass', enabled: 0, sort_order: 35 },
|
||||
{ id: 'airtrail', name: 'AirTrail', description: 'Sync flights from your self-hosted AirTrail instance', type: 'integration', icon: 'Plane', enabled: 0, sort_order: 14 },
|
||||
{ id: 'llm_parsing', name: 'AI Parsing', description: 'LLM fallback for booking imports kitinerary cannot read', type: 'integration', icon: 'Sparkles', enabled: 0, sort_order: 15 },
|
||||
];
|
||||
const insertAddon = db.prepare('INSERT OR IGNORE INTO addons (id, name, description, type, icon, enabled, sort_order) VALUES (?, ?, ?, ?, ?, ?, ?)');
|
||||
for (const a of defaultAddons) insertAddon.run(a.id, a.name, a.description, a.type, a.icon, a.enabled, a.sort_order);
|
||||
|
||||
@@ -15,7 +15,8 @@ import type { User } from '../../types';
|
||||
import { JwtAuthGuard } from '../auth/jwt-auth.guard';
|
||||
import { CurrentUser } from '../auth/current-user.decorator';
|
||||
import { BookingImportService } from './booking-import.service';
|
||||
import type { BookingImportPreviewItem, BookingImportPreviewResponse, BookingImportConfirmResponse } from '@trek/shared';
|
||||
import { bookingImportModeSchema } from '@trek/shared';
|
||||
import type { BookingImportPreviewItem, BookingImportPreviewResponse, BookingImportConfirmResponse, BookingImportMode } from '@trek/shared';
|
||||
|
||||
const ACCEPTED_EXTS = new Set(['.eml', '.pdf', '.pkpass', '.html', '.htm', '.txt']);
|
||||
const MAX_FILE_BYTES = 10 * 1024 * 1024;
|
||||
@@ -54,11 +55,23 @@ export class BookingImportController {
|
||||
@CurrentUser() user: User,
|
||||
@Param('tripId') tripId: string,
|
||||
@UploadedFiles() files: Express.Multer.File[] | undefined,
|
||||
@Body('mode') rawMode?: string,
|
||||
) {
|
||||
const trip = this.requireTrip(tripId, user);
|
||||
this.requireEdit(trip, user);
|
||||
|
||||
if (!this.bookingImport.isAvailable()) {
|
||||
const modeResult = bookingImportModeSchema.safeParse(rawMode ?? 'no-ai');
|
||||
if (!modeResult.success) {
|
||||
throw new HttpException({ error: 'Invalid mode' }, 400);
|
||||
}
|
||||
const mode: BookingImportMode = modeResult.data;
|
||||
|
||||
// Forcing AI requires it to be configured; otherwise surface a clear 4xx.
|
||||
if (mode === 'force-ai' && !this.bookingImport.aiAvailable(user.id)) {
|
||||
throw new HttpException({ error: 'AI parsing is not configured' }, 409);
|
||||
}
|
||||
// For the kitinerary-only path, keep the existing 503 contract.
|
||||
if (mode === 'no-ai' && !this.bookingImport.isAvailable()) {
|
||||
throw new HttpException({ error: 'KItinerary extractor is not available on this server' }, 503);
|
||||
}
|
||||
|
||||
@@ -74,7 +87,7 @@ export class BookingImportController {
|
||||
}
|
||||
}
|
||||
|
||||
const result: BookingImportPreviewResponse = await this.bookingImport.preview(files);
|
||||
const result: BookingImportPreviewResponse = await this.bookingImport.preview(files, mode, user.id);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
@@ -3,8 +3,10 @@ import { BookingImportController } from './booking-import.controller';
|
||||
import { BookingImportService } from './booking-import.service';
|
||||
import { KitineraryExtractorService } from './kitinerary-extractor.service';
|
||||
import { FeaturesController } from './features.controller';
|
||||
import { LlmParseModule } from '../llm-parse/llm-parse.module';
|
||||
|
||||
@Module({
|
||||
imports: [LlmParseModule],
|
||||
controllers: [BookingImportController, FeaturesController],
|
||||
providers: [BookingImportService, KitineraryExtractorService],
|
||||
})
|
||||
|
||||
@@ -8,9 +8,10 @@ import { searchNominatim } from '../../services/mapsService';
|
||||
import { db } from '../../db/database';
|
||||
import type { User } from '../../types';
|
||||
import { KitineraryExtractorService } from './kitinerary-extractor.service';
|
||||
import { LlmParseService } from '../llm-parse/llm-parse.service';
|
||||
import { mapReservations } from './kitinerary-mapper';
|
||||
import type { BookingImportPreviewItem, BookingImportPreviewResponse, BookingImportConfirmResponse, Reservation } from '@trek/shared';
|
||||
import type { ParsedBookingItem } from './kitinerary.types';
|
||||
import type { BookingImportPreviewItem, BookingImportPreviewResponse, BookingImportConfirmResponse, BookingImportMode, BookingImportFileReport, Reservation } from '@trek/shared';
|
||||
import type { ParsedBookingItem, KiReservation } from './kitinerary.types';
|
||||
|
||||
function resolveDayId(tripId: string, iso: string | null | undefined): number | null {
|
||||
if (!iso) return null;
|
||||
@@ -22,12 +23,20 @@ function resolveDayId(tripId: string, iso: string | null | undefined): number |
|
||||
|
||||
@Injectable()
|
||||
export class BookingImportService {
|
||||
constructor(private readonly extractor: KitineraryExtractorService) {}
|
||||
constructor(
|
||||
private readonly extractor: KitineraryExtractorService,
|
||||
private readonly llmParse: LlmParseService,
|
||||
) {}
|
||||
|
||||
isAvailable(): boolean {
|
||||
return this.extractor.isAvailable();
|
||||
}
|
||||
|
||||
/** True when the LLM fallback is enabled and configured for this user. */
|
||||
aiAvailable(userId: number): boolean {
|
||||
return this.llmParse.isAvailable(userId);
|
||||
}
|
||||
|
||||
verifyTripAccess(tripId: string, userId: number) {
|
||||
return verifyTripAccess(tripId, userId);
|
||||
}
|
||||
@@ -37,37 +46,65 @@ export class BookingImportService {
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse uploaded files through kitinerary-extractor and return a preview list.
|
||||
* Does NOT persist anything.
|
||||
* Parse uploaded files and return a preview list. Does NOT persist anything.
|
||||
* Runs kitinerary first; depending on `mode`, falls back to the LLM:
|
||||
* - no-ai: kitinerary only
|
||||
* - fallback-on-empty: LLM for files kitinerary returns nothing for
|
||||
* - force-ai: LLM on every file (kitinerary skipped)
|
||||
* LLM-derived items are flagged needs_review. Per-file AI usage is reported.
|
||||
*/
|
||||
async preview(files: Express.Multer.File[]): Promise<BookingImportPreviewResponse> {
|
||||
if (!this.extractor.isAvailable()) {
|
||||
async preview(
|
||||
files: Express.Multer.File[],
|
||||
mode: BookingImportMode,
|
||||
userId: number,
|
||||
): Promise<BookingImportPreviewResponse> {
|
||||
const kitineraryAvailable = this.extractor.isAvailable();
|
||||
const aiAvailable = this.llmParse.isAvailable(userId);
|
||||
if (!kitineraryAvailable && !aiAvailable) {
|
||||
throw new HttpException({ error: 'KItinerary extractor is not available on this server' }, 503);
|
||||
}
|
||||
|
||||
const allItems: ParsedBookingItem[] = [];
|
||||
const allWarnings: string[] = [];
|
||||
const fileReports: BookingImportFileReport[] = [];
|
||||
|
||||
for (const file of files) {
|
||||
let kiItems;
|
||||
try {
|
||||
kiItems = await this.extractor.extract(file.buffer, file.originalname);
|
||||
} catch (err) {
|
||||
allWarnings.push(`${file.originalname}: extraction failed — ${err instanceof Error ? err.message : String(err)}`);
|
||||
continue;
|
||||
let kiItems: KiReservation[] = [];
|
||||
let aiUsed = false;
|
||||
|
||||
// Stage 1: kitinerary (skipped entirely when forcing AI).
|
||||
if (mode !== 'force-ai' && kitineraryAvailable) {
|
||||
try {
|
||||
kiItems = await this.extractor.extract(file.buffer, file.originalname);
|
||||
} catch (err) {
|
||||
allWarnings.push(`${file.originalname}: extraction failed — ${err instanceof Error ? err.message : String(err)}`);
|
||||
}
|
||||
}
|
||||
|
||||
// Stage 1b: LLM fallback.
|
||||
const runLlm = aiAvailable && (mode === 'force-ai' || (mode === 'fallback-on-empty' && kiItems.length === 0));
|
||||
if (runLlm) {
|
||||
aiUsed = true;
|
||||
const llm = await this.llmParse.parse({ buffer: file.buffer, originalName: file.originalname }, userId);
|
||||
kiItems = llm.kiItems;
|
||||
allWarnings.push(...llm.warnings);
|
||||
}
|
||||
|
||||
fileReports.push({ fileName: file.originalname, aiAvailable, aiUsed });
|
||||
|
||||
if (kiItems.length === 0) {
|
||||
allWarnings.push(`${file.originalname}: no reservations found`);
|
||||
continue;
|
||||
}
|
||||
|
||||
const { items, warnings } = mapReservations(kiItems, file.originalname);
|
||||
// LLM extraction is less certain than kitinerary — always flag for review.
|
||||
if (aiUsed) for (const it of items) it.needs_review = true;
|
||||
allItems.push(...items);
|
||||
allWarnings.push(...warnings);
|
||||
}
|
||||
|
||||
return { items: allItems, warnings: allWarnings };
|
||||
return { items: allItems, warnings: allWarnings, files: fileReports };
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
import { Controller, Get } from '@nestjs/common';
|
||||
import { KitineraryExtractorService } from './kitinerary-extractor.service';
|
||||
import { isAddonEnabled } from '../../services/adminService';
|
||||
import { ADDON_IDS } from '../../addons';
|
||||
|
||||
/** Exposes server feature flags consumed by the frontend to show/hide optional UI. */
|
||||
@Controller('api/health')
|
||||
@@ -10,6 +12,9 @@ export class FeaturesController {
|
||||
features() {
|
||||
return {
|
||||
bookingImport: this.extractor.isAvailable(),
|
||||
// Addon-level flag (per-user config availability is reported per-file in
|
||||
// the preview response). Drives whether the client shows AI affordances.
|
||||
aiParsing: isAddonEnabled(ADDON_IDS.LLM_PARSING),
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,85 @@
|
||||
import type { LlmExtractionClient, LlmExtractionInput } from '../llm-provider.interface';
|
||||
|
||||
const TIMEOUT_MS = 120_000;
|
||||
const MAX_TOKENS = 8192;
|
||||
const ANTHROPIC_VERSION = '2023-06-01';
|
||||
const TOOL_NAME = 'emit_reservations';
|
||||
|
||||
/**
|
||||
* Anthropic Messages API client. Structured output via forced tool-use: a single
|
||||
* `emit_reservations` tool whose `input_schema` is the reservations schema, with
|
||||
* `tool_choice` forcing it — the documented, reliable way to get structured JSON.
|
||||
* PDFs go as native base64 `document` blocks (Anthropic reads scanned PDFs).
|
||||
* Raw fetch (no SDK) to match the codebase's HTTP style.
|
||||
*/
|
||||
export class AnthropicClient implements LlmExtractionClient {
|
||||
async extract(input: LlmExtractionInput): Promise<Record<string, unknown>[]> {
|
||||
const base = (input.baseUrl ?? 'https://api.anthropic.com').replace(/\/+$/, '');
|
||||
const url = `${base}/v1/messages`;
|
||||
|
||||
const content: unknown[] = [];
|
||||
if (input.file) {
|
||||
content.push({
|
||||
type: 'document',
|
||||
source: { type: 'base64', media_type: input.file.mimeType, data: input.file.data.toString('base64') },
|
||||
});
|
||||
}
|
||||
content.push({
|
||||
type: 'text',
|
||||
text: input.text ? `${USER_TEXT}\n\n${input.text}` : USER_TEXT,
|
||||
});
|
||||
|
||||
const body = {
|
||||
model: input.model,
|
||||
max_tokens: MAX_TOKENS,
|
||||
system: input.prompt,
|
||||
tools: [
|
||||
{
|
||||
name: TOOL_NAME,
|
||||
description: 'Return the travel reservations extracted from the document.',
|
||||
input_schema: input.jsonSchema,
|
||||
},
|
||||
],
|
||||
tool_choice: { type: 'tool', name: TOOL_NAME },
|
||||
messages: [{ role: 'user', content }],
|
||||
};
|
||||
|
||||
const controller = new AbortController();
|
||||
const timer = setTimeout(() => controller.abort(), TIMEOUT_MS);
|
||||
let res: Response;
|
||||
try {
|
||||
res = await fetch(url, {
|
||||
method: 'POST',
|
||||
signal: controller.signal,
|
||||
headers: {
|
||||
'content-type': 'application/json',
|
||||
'x-api-key': input.apiKey ?? '',
|
||||
'anthropic-version': ANTHROPIC_VERSION,
|
||||
},
|
||||
body: JSON.stringify(body),
|
||||
});
|
||||
} finally {
|
||||
clearTimeout(timer);
|
||||
}
|
||||
|
||||
if (!res.ok) {
|
||||
const detail = await res.text().catch(() => '');
|
||||
throw new Error(`Anthropic request failed (${res.status}): ${detail.slice(0, 300)}`);
|
||||
}
|
||||
|
||||
const data = (await res.json()) as {
|
||||
stop_reason?: string;
|
||||
content?: { type: string; name?: string; input?: { reservations?: unknown } }[];
|
||||
};
|
||||
|
||||
if (data.stop_reason === 'refusal') {
|
||||
throw new Error('Anthropic declined to process this document');
|
||||
}
|
||||
|
||||
const toolUse = data.content?.find(b => b.type === 'tool_use' && b.name === TOOL_NAME);
|
||||
const reservations = toolUse?.input?.reservations;
|
||||
return Array.isArray(reservations) ? (reservations as Record<string, unknown>[]) : [];
|
||||
}
|
||||
}
|
||||
|
||||
const USER_TEXT = 'Extract every travel reservation from the following document as schema.org JSON-LD.';
|
||||
@@ -0,0 +1,95 @@
|
||||
import type { LlmExtractionClient, LlmExtractionInput } from '../llm-provider.interface';
|
||||
|
||||
const TIMEOUT_MS = 60_000;
|
||||
const MAX_TOKENS = 4096;
|
||||
|
||||
/**
|
||||
* OpenAI-compatible chat-completions client. Covers both the "openai" cloud
|
||||
* provider and the "local" provider (Ollama / vLLM / llama.cpp / LM Studio),
|
||||
* which all expose `POST {baseUrl}/chat/completions`. Native binaries (PDF) are
|
||||
* sent as an OpenAI `file` content part; text goes as a text part. Uses the
|
||||
* global fetch (no SDK) to match the codebase's HTTP style.
|
||||
*/
|
||||
export class OpenAiCompatibleClient implements LlmExtractionClient {
|
||||
async extract(input: LlmExtractionInput): Promise<Record<string, unknown>[]> {
|
||||
const base = (input.baseUrl ?? 'https://api.openai.com/v1').replace(/\/+$/, '');
|
||||
const url = `${base}/chat/completions`;
|
||||
|
||||
const userContent: unknown[] = [
|
||||
{ type: 'text', text: input.text ? `${USER_TEXT}\n\n${input.text}` : USER_TEXT },
|
||||
];
|
||||
// Only genuine images go natively (as image_url) — OpenAI-compatible servers
|
||||
// (notably Ollama) reject `file`/PDF content parts. PDFs reach this client as
|
||||
// pre-extracted text (see llm-parse.service.ts), never as bytes.
|
||||
if (input.file && input.file.mimeType.startsWith('image/')) {
|
||||
const b64 = input.file.data.toString('base64');
|
||||
userContent.push({
|
||||
type: 'image_url',
|
||||
image_url: { url: `data:${input.file.mimeType};base64,${b64}` },
|
||||
});
|
||||
}
|
||||
|
||||
const body = {
|
||||
model: input.model,
|
||||
max_tokens: MAX_TOKENS,
|
||||
// Extraction is a deterministic task — Ollama defaults to 0.7, which makes
|
||||
// small models (NuExtract) drop fields or return empty. Pin to 0.
|
||||
temperature: 0,
|
||||
messages: [
|
||||
{ role: 'system', content: input.prompt },
|
||||
{ role: 'user', content: userContent },
|
||||
],
|
||||
response_format: {
|
||||
type: 'json_schema',
|
||||
json_schema: { name: 'reservations', schema: input.jsonSchema, strict: false },
|
||||
},
|
||||
};
|
||||
|
||||
const controller = new AbortController();
|
||||
const timer = setTimeout(() => controller.abort(), TIMEOUT_MS);
|
||||
let res: Response;
|
||||
try {
|
||||
res = await fetch(url, {
|
||||
method: 'POST',
|
||||
signal: controller.signal,
|
||||
headers: {
|
||||
'content-type': 'application/json',
|
||||
...(input.apiKey ? { authorization: `Bearer ${input.apiKey}` } : {}),
|
||||
},
|
||||
body: JSON.stringify(body),
|
||||
});
|
||||
} finally {
|
||||
clearTimeout(timer);
|
||||
}
|
||||
|
||||
if (!res.ok) {
|
||||
const detail = await res.text().catch(() => '');
|
||||
throw new Error(`LLM request failed (${res.status}): ${detail.slice(0, 300)}`);
|
||||
}
|
||||
|
||||
const data = (await res.json()) as {
|
||||
choices?: { message?: { content?: string } }[];
|
||||
};
|
||||
const content = data.choices?.[0]?.message?.content;
|
||||
return parseReservations(content);
|
||||
}
|
||||
}
|
||||
|
||||
const USER_TEXT = 'Extract every travel reservation from the following document as schema.org JSON-LD.';
|
||||
|
||||
/** Tolerant parse: strip code fences, JSON.parse, pull `reservations`. `[]` on failure. */
|
||||
function parseReservations(content: string | undefined | null): Record<string, unknown>[] {
|
||||
if (!content) return [];
|
||||
const stripped = content.trim().replace(/^```(?:json)?/i, '').replace(/```$/, '').trim();
|
||||
let parsed: unknown;
|
||||
try {
|
||||
parsed = JSON.parse(stripped);
|
||||
} catch {
|
||||
return [];
|
||||
}
|
||||
if (Array.isArray(parsed)) return parsed as Record<string, unknown>[];
|
||||
if (parsed && typeof parsed === 'object' && Array.isArray((parsed as { reservations?: unknown }).reservations)) {
|
||||
return (parsed as { reservations: Record<string, unknown>[] }).reservations;
|
||||
}
|
||||
return [];
|
||||
}
|
||||
@@ -0,0 +1,24 @@
|
||||
import type { LlmExtractionClient } from './llm-provider.interface';
|
||||
import type { ResolvedLlmConfig } from '../../services/llmConfig';
|
||||
import { OpenAiCompatibleClient } from './clients/openai-compatible.client';
|
||||
import { AnthropicClient } from './clients/anthropic.client';
|
||||
|
||||
/**
|
||||
* Pick the provider client for a resolved config.
|
||||
* - 'anthropic' → Anthropic Messages API client
|
||||
* - 'openai' | 'local' → OpenAI-compatible client (cloud or local base URL)
|
||||
*/
|
||||
export function createLlmClient(config: ResolvedLlmConfig): LlmExtractionClient {
|
||||
switch (config.provider) {
|
||||
case 'anthropic':
|
||||
return new AnthropicClient();
|
||||
case 'openai':
|
||||
case 'local':
|
||||
return new OpenAiCompatibleClient();
|
||||
// TODO(nuextract): add a NuExtract template adapter here (local vision model
|
||||
// with its own template-fill API) once the OpenAI-compatible path proves
|
||||
// insufficient for small local models — see the design seam in the plan.
|
||||
default:
|
||||
return new OpenAiCompatibleClient();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,55 @@
|
||||
import { db } from '../../db/database';
|
||||
import { ADDON_IDS } from '../../addons';
|
||||
import { isAddonEnabled } from '../../services/adminService';
|
||||
import { getUserSettings, getDecryptedUserSetting } from '../../services/settingsService';
|
||||
import { decryptLlmApiKey, LLM_PROVIDERS, type LlmProvider, type ResolvedLlmConfig } from '../../services/llmConfig';
|
||||
|
||||
function asProvider(v: unknown): LlmProvider | null {
|
||||
return typeof v === 'string' && (LLM_PROVIDERS as string[]).includes(v) ? (v as LlmProvider) : null;
|
||||
}
|
||||
|
||||
function readInstanceConfig(): ResolvedLlmConfig | null {
|
||||
const row = db.prepare('SELECT config FROM addons WHERE id = ?').get(ADDON_IDS.LLM_PARSING) as { config?: string } | undefined;
|
||||
if (!row?.config) return null;
|
||||
let cfg: Record<string, unknown>;
|
||||
try {
|
||||
cfg = JSON.parse(row.config || '{}');
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
const provider = asProvider(cfg.provider);
|
||||
const model = typeof cfg.model === 'string' ? cfg.model.trim() : '';
|
||||
if (!provider || !model) return null;
|
||||
return {
|
||||
provider,
|
||||
model,
|
||||
baseUrl: typeof cfg.baseUrl === 'string' && cfg.baseUrl.trim() ? cfg.baseUrl.trim() : undefined,
|
||||
apiKey: decryptLlmApiKey(cfg.apiKey),
|
||||
multimodal: cfg.multimodal === true,
|
||||
};
|
||||
}
|
||||
|
||||
function readUserConfig(userId: number): ResolvedLlmConfig | null {
|
||||
const settings = getUserSettings(userId);
|
||||
const provider = asProvider(settings.llm_provider);
|
||||
const model = typeof settings.llm_model === 'string' ? settings.llm_model.trim() : '';
|
||||
if (!provider || !model) return null;
|
||||
const apiKey = getDecryptedUserSetting(userId, 'llm_api_key') ?? undefined;
|
||||
return {
|
||||
provider,
|
||||
model,
|
||||
baseUrl: typeof settings.llm_base_url === 'string' && settings.llm_base_url.trim() ? settings.llm_base_url.trim() : undefined,
|
||||
apiKey,
|
||||
multimodal: settings.llm_multimodal === true,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve the effective LLM config for a user, gated by the addon.
|
||||
* Order: addon disabled → null; admin instance config wins; else per-user config;
|
||||
* else null. This is the single place the API key is decrypted.
|
||||
*/
|
||||
export function resolveLlmConfig(userId: number): ResolvedLlmConfig | null {
|
||||
if (!isAddonEnabled(ADDON_IDS.LLM_PARSING)) return null;
|
||||
return readInstanceConfig() ?? readUserConfig(userId);
|
||||
}
|
||||
@@ -0,0 +1,45 @@
|
||||
import { Controller, Get, Post, Query, Body, Res, UseGuards } from '@nestjs/common';
|
||||
import type { Response } from 'express';
|
||||
import { JwtAuthGuard } from '../auth/jwt-auth.guard';
|
||||
import { AdminGuard } from '../auth/admin.guard';
|
||||
import { LlmLocalService } from './llm-local.service';
|
||||
|
||||
/**
|
||||
* Admin-only management of a local LLM server (Ollama): list installed models and
|
||||
* pull new ones (e.g. NuExtract). Used by the AI-parsing addon config UI.
|
||||
*/
|
||||
@Controller('api/admin/llm/local')
|
||||
@UseGuards(JwtAuthGuard, AdminGuard)
|
||||
export class LlmLocalController {
|
||||
constructor(private readonly local: LlmLocalService) {}
|
||||
|
||||
@Get('models')
|
||||
models(@Query('baseUrl') baseUrl?: string) {
|
||||
return this.local.listModels(baseUrl);
|
||||
}
|
||||
|
||||
/**
|
||||
* Stream a model pull. Proxies Ollama's NDJSON progress lines
|
||||
* ({ status, total?, completed? }) straight to the client, which reads the
|
||||
* response body to render a progress bar. Uses @Res() to stream manually.
|
||||
*/
|
||||
@Post('pull')
|
||||
async pull(@Body() body: { baseUrl?: string; model?: string }, @Res() res: Response): Promise<void> {
|
||||
const stream = await this.local.pull(body?.baseUrl, body?.model ?? '');
|
||||
res.status(200);
|
||||
res.setHeader('Content-Type', 'application/x-ndjson');
|
||||
res.setHeader('Cache-Control', 'no-cache');
|
||||
const reader = stream.getReader();
|
||||
try {
|
||||
for (;;) {
|
||||
const { done, value } = await reader.read();
|
||||
if (done) break;
|
||||
res.write(Buffer.from(value));
|
||||
}
|
||||
} catch {
|
||||
// Upstream dropped mid-pull — close the response; the client surfaces it.
|
||||
} finally {
|
||||
res.end();
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,63 @@
|
||||
import { Injectable, HttpException } from '@nestjs/common';
|
||||
|
||||
/**
|
||||
* Admin helpers for managing a local OpenAI-compatible LLM server (Ollama).
|
||||
* Talks to Ollama's *management* API (`/api/tags`, `/api/pull`), which lives at
|
||||
* the server root — not the `/v1` OpenAI-compatible path the extraction client
|
||||
* uses. Admin-only (guarded at the controller); the base URL is admin-supplied
|
||||
* and typically points at a localhost Ollama, so SSRF guarding is intentionally
|
||||
* not applied (it would block localhost) — we only validate the protocol.
|
||||
*/
|
||||
@Injectable()
|
||||
export class LlmLocalService {
|
||||
/** Derive the Ollama root from a configured base URL (strip a trailing /v1). */
|
||||
ollamaRoot(baseUrl: string | undefined): string {
|
||||
const raw = (baseUrl ?? 'http://localhost:11434').trim();
|
||||
let url: URL;
|
||||
try {
|
||||
url = new URL(raw);
|
||||
} catch {
|
||||
throw new HttpException({ error: 'Invalid base URL' }, 400);
|
||||
}
|
||||
if (url.protocol !== 'http:' && url.protocol !== 'https:') {
|
||||
throw new HttpException({ error: 'Base URL must be http(s)' }, 400);
|
||||
}
|
||||
return raw.replace(/\/+$/, '').replace(/\/v1$/, '');
|
||||
}
|
||||
|
||||
/** List models already pulled on the local server. */
|
||||
async listModels(baseUrl: string | undefined): Promise<{ models: { name: string; size: number }[] }> {
|
||||
const root = this.ollamaRoot(baseUrl);
|
||||
let res: Response;
|
||||
try {
|
||||
res = await fetch(`${root}/api/tags`, { signal: AbortSignal.timeout(10_000) });
|
||||
} catch {
|
||||
throw new HttpException({ error: `Could not reach local LLM server at ${root}` }, 502);
|
||||
}
|
||||
if (!res.ok) throw new HttpException({ error: `Local LLM server error (${res.status})` }, 502);
|
||||
const data = (await res.json()) as { models?: { name?: string; size?: number }[] };
|
||||
const models = (data.models ?? []).map(m => ({ name: m.name ?? '', size: m.size ?? 0 })).filter(m => m.name);
|
||||
return { models };
|
||||
}
|
||||
|
||||
/**
|
||||
* Start a streamed pull. Returns the upstream NDJSON body so the controller can
|
||||
* pipe Ollama's progress lines straight to the client.
|
||||
*/
|
||||
async pull(baseUrl: string | undefined, model: string): Promise<ReadableStream<Uint8Array>> {
|
||||
if (!model?.trim()) throw new HttpException({ error: 'model is required' }, 400);
|
||||
const root = this.ollamaRoot(baseUrl);
|
||||
let res: Response;
|
||||
try {
|
||||
res = await fetch(`${root}/api/pull`, {
|
||||
method: 'POST',
|
||||
headers: { 'content-type': 'application/json' },
|
||||
body: JSON.stringify({ model: model.trim(), stream: true }),
|
||||
});
|
||||
} catch {
|
||||
throw new HttpException({ error: `Could not reach local LLM server at ${root}` }, 502);
|
||||
}
|
||||
if (!res.ok || !res.body) throw new HttpException({ error: `Pull failed (${res.status})` }, 502);
|
||||
return res.body;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,12 @@
|
||||
import { Module } from '@nestjs/common';
|
||||
import { LlmParseService } from './llm-parse.service';
|
||||
import { LlmLocalService } from './llm-local.service';
|
||||
import { LlmLocalController } from './llm-local.controller';
|
||||
|
||||
/** Provides the LLM booking-import fallback; imported by BookingImportModule. */
|
||||
@Module({
|
||||
controllers: [LlmLocalController],
|
||||
providers: [LlmParseService, LlmLocalService],
|
||||
exports: [LlmParseService],
|
||||
})
|
||||
export class LlmParseModule {}
|
||||
@@ -0,0 +1,129 @@
|
||||
import type { KiReservation } from '../booking-import/kitinerary.types';
|
||||
import { createLlmClient } from './llm-client.factory';
|
||||
import { resolveLlmConfig } from './llm-config.resolver';
|
||||
import { buildSystemPrompt, KI_RESERVATION_JSON_SCHEMA } from './llm-prompt';
|
||||
import type { LlmExtractionInput } from './llm-provider.interface';
|
||||
import { isPdf, extractText } from './text-extract';
|
||||
import { Injectable } from '@nestjs/common';
|
||||
import { kiReservationSchema } from '@trek/shared';
|
||||
|
||||
const MIME_BY_EXT: Record<string, string> = {
|
||||
'.pdf': 'application/pdf',
|
||||
};
|
||||
|
||||
export interface LlmParseResult {
|
||||
kiItems: KiReservation[];
|
||||
warnings: string[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Orchestrates the LLM fallback: resolve config → pick client → build input
|
||||
* (native bytes vs extracted text by the `multimodal` flag) → call provider →
|
||||
* validate the response → return schema.org `KiReservation[]` for the shared
|
||||
* mapper. Never throws for content/provider reasons — degrades to `[]` + a
|
||||
* warning, mirroring the kitinerary extractor's tolerance.
|
||||
*/
|
||||
@Injectable()
|
||||
export class LlmParseService {
|
||||
/** True when the addon is enabled AND a usable config resolves for this user. */
|
||||
isAvailable(userId: number): boolean {
|
||||
return resolveLlmConfig(userId) !== null;
|
||||
}
|
||||
|
||||
async parse(file: { buffer: Buffer; originalName: string }, userId: number): Promise<LlmParseResult> {
|
||||
const config = resolveLlmConfig(userId);
|
||||
if (!config) return { kiItems: [], warnings: ['AI parsing is not configured'] };
|
||||
|
||||
const warnings: string[] = [];
|
||||
const input: LlmExtractionInput = {
|
||||
prompt: buildSystemPrompt(),
|
||||
jsonSchema: KI_RESERVATION_JSON_SCHEMA,
|
||||
model: config.model,
|
||||
baseUrl: config.baseUrl,
|
||||
apiKey: config.apiKey,
|
||||
};
|
||||
|
||||
// Native PDF only for Anthropic (its document block reads text AND scans).
|
||||
// OpenAI-compatible servers (incl. Ollama/NuExtract) can't ingest PDFs/`file`
|
||||
// parts, so every other provider gets extracted text.
|
||||
try {
|
||||
if (config.provider === 'anthropic' && isPdf(file.originalName)) {
|
||||
input.file = { mimeType: MIME_BY_EXT['.pdf'], data: file.buffer };
|
||||
console.debug(
|
||||
`[DEBUG] Extracted (native PDF, ${file.buffer.length} bytes) sent to ${config.provider}: ${file.originalName}`,
|
||||
);
|
||||
} else {
|
||||
input.text = await extractText(file.buffer, file.originalName);
|
||||
console.debug(`[DEBUG] Extracted text from ${file.originalName} (${input.text.length} chars):\n`, input.text);
|
||||
if (!input.text.trim()) {
|
||||
return {
|
||||
kiItems: [],
|
||||
warnings: [`${file.originalName}: no readable text found (a scanned PDF needs a cloud/vision provider)`],
|
||||
};
|
||||
}
|
||||
}
|
||||
} catch (err) {
|
||||
return {
|
||||
kiItems: [],
|
||||
warnings: [`${file.originalName}: could not read file — ${err instanceof Error ? err.message : String(err)}`],
|
||||
};
|
||||
}
|
||||
|
||||
let raw: Record<string, unknown>[];
|
||||
try {
|
||||
raw = await createLlmClient(config).extract(input);
|
||||
console.debug('[DEBUG] Raw LLM Response: ', raw);
|
||||
} catch (err) {
|
||||
return {
|
||||
kiItems: [],
|
||||
warnings: [`${file.originalName}: AI parsing failed — ${err instanceof Error ? err.message : String(err)}`],
|
||||
};
|
||||
}
|
||||
|
||||
const kiItems: KiReservation[] = [];
|
||||
for (const node of raw) {
|
||||
const result = kiReservationSchema.safeParse(node);
|
||||
if (result.success) kiItems.push(normalizeNode(result.data) as unknown as KiReservation);
|
||||
else warnings.push(`${file.originalName}: skipped an unrecognized AI result`);
|
||||
}
|
||||
|
||||
return { kiItems, warnings };
|
||||
}
|
||||
}
|
||||
|
||||
/** Root-level keys in the schema.org reservation shape; everything else is trip-specific. */
|
||||
const ROOT_KEYS = new Set([
|
||||
'@type',
|
||||
'reservationNumber',
|
||||
'checkinTime',
|
||||
'checkoutTime',
|
||||
'pickupTime',
|
||||
'dropoffTime',
|
||||
'startTime',
|
||||
'endTime',
|
||||
'pickupLocation',
|
||||
'reservationFor',
|
||||
]);
|
||||
|
||||
/**
|
||||
* Small models often flatten the type-specific fields (flightNumber, airline,
|
||||
* departureAirport, …) onto the reservation root instead of nesting them under
|
||||
* `reservationFor`, which is where the kitinerary mapper reads them. When
|
||||
* `reservationFor` is missing/empty, fold the non-root keys into it so the
|
||||
* existing mappers work unchanged.
|
||||
*/
|
||||
function normalizeNode(node: Record<string, unknown>): Record<string, unknown> {
|
||||
const rf = node.reservationFor;
|
||||
if (rf && typeof rf === 'object' && Object.keys(rf as object).length > 0) return node;
|
||||
|
||||
const out: Record<string, unknown> = {};
|
||||
const reservationFor: Record<string, unknown> = {};
|
||||
for (const [k, v] of Object.entries(node)) {
|
||||
if (ROOT_KEYS.has(k)) out[k] = v;
|
||||
else reservationFor[k] = v;
|
||||
}
|
||||
// Nothing to fold (no flattened type fields) — leave the node as-is.
|
||||
if (Object.keys(reservationFor).length === 0) return node;
|
||||
out.reservationFor = reservationFor;
|
||||
return out;
|
||||
}
|
||||
@@ -0,0 +1,35 @@
|
||||
import { KI_RESERVATION_JSON_SCHEMA, KI_RESERVATION_TYPES } from '@trek/shared';
|
||||
|
||||
export { KI_RESERVATION_JSON_SCHEMA };
|
||||
|
||||
/**
|
||||
* System instructions telling the model to emit schema.org reservation JSON-LD
|
||||
* in exactly the shape the kitinerary binary produces — so the result feeds the
|
||||
* same `mapReservations()` mapper. Pure (no I/O) so it's unit-testable.
|
||||
*/
|
||||
export function buildSystemPrompt(): string {
|
||||
return [
|
||||
'You extract travel reservations from a document (a booking confirmation, ticket, or itinerary).',
|
||||
'Return ONLY a JSON object of the form { "reservations": [ ... ] } — no prose, no markdown.',
|
||||
'Each reservation is a schema.org JSON-LD object whose "@type" is one of:',
|
||||
KI_RESERVATION_TYPES.map((t) => ` - ${t}`).join('\n'),
|
||||
'Put the booking/confirmation code in "reservationNumber" on each reservation.',
|
||||
'All dates/times are plain ISO 8601 local strings, e.g. "2026-06-11T10:00:00" (no timezone wrapper objects).',
|
||||
'IMPORTANT: nest the type-specific fields INSIDE a "reservationFor" object — do NOT place them at the top level of the reservation.',
|
||||
'Populate "reservationFor" with the type-specific fields:',
|
||||
' FlightReservation: { flightNumber, airline:{name,iataCode}, departureAirport:{iataCode,name,geo:{latitude,longitude}}, arrivalAirport:{...}, departureTime, arrivalTime }',
|
||||
' TrainReservation: { trainNumber, trainName, departureStation:{name,geo}, arrivalStation:{name,geo}, departureTime, arrivalTime }',
|
||||
' BusReservation: { busNumber, busName, departureBusStop:{name,geo}, arrivalBusStop:{name,geo}, departureTime, arrivalTime }',
|
||||
' BoatReservation: { name, departureBoatTerminal:{name,geo}, arrivalBoatTerminal:{name,geo}, departureTime, arrivalTime }',
|
||||
' LodgingReservation: { name, address, geo:{latitude,longitude}, telephone, url } — put check-in/out in root "checkinTime"/"checkoutTime"',
|
||||
' FoodEstablishmentReservation: { name, address, geo, telephone, url } — put booking time in root "startTime"/"endTime"',
|
||||
' RentalCarReservation: { name, model, make, rentalCompany:{name} } — put pickup/dropoff in root "pickupTime"/"dropoffTime" and "pickupLocation":{name,address,geo}',
|
||||
' EventReservation / TouristAttractionVisit: { name, startDate, endDate, location:{name,address,geo} }',
|
||||
'Extract EVERY flight/segment in the document, including return legs — a round trip has TWO OR MORE flights, and each row of a flight table is a separate reservation. Do NOT stop after the first.',
|
||||
"Each flight shares the booking's reservationNumber. Use the date shown for that specific flight as its departureTime; if a flight lists only one date (no separate arrival time), leave arrivalTime null — never reuse another flight's date.",
|
||||
'If the document contains no recognizable reservation, return { "reservations": [] }.',
|
||||
].join('\n');
|
||||
}
|
||||
|
||||
/** Short user-turn instruction that accompanies the document content. */
|
||||
export const USER_INSTRUCTION = 'Extract every travel reservation from the following document as schema.org JSON-LD.';
|
||||
@@ -0,0 +1,30 @@
|
||||
/** A single binary file (e.g. a PDF) sent natively to a multimodal provider. */
|
||||
export interface LlmExtractionFile {
|
||||
mimeType: string;
|
||||
data: Buffer;
|
||||
}
|
||||
|
||||
/** Everything a provider client needs to extract reservations from one document. */
|
||||
export interface LlmExtractionInput {
|
||||
/** System instructions enumerating the schema.org shape (see llm-prompt.ts). */
|
||||
prompt: string;
|
||||
/** JSON Schema describing `{ reservations: KiReservation[] }`. */
|
||||
jsonSchema: object;
|
||||
model: string;
|
||||
baseUrl?: string;
|
||||
apiKey?: string;
|
||||
/** Pre-extracted text (text-like files, or text-only-model mode). */
|
||||
text?: string;
|
||||
/** Native binary (PDF) for multimodal providers. */
|
||||
file?: LlmExtractionFile;
|
||||
}
|
||||
|
||||
/**
|
||||
* A provider client turns one document into raw schema.org reservation objects.
|
||||
* It returns the parsed `reservations` array (best-effort: `[]` on a malformed or
|
||||
* empty response, never throwing for content reasons). The caller validates and
|
||||
* maps via the shared kitinerary mapper.
|
||||
*/
|
||||
export interface LlmExtractionClient {
|
||||
extract(input: LlmExtractionInput): Promise<Record<string, unknown>[]>;
|
||||
}
|
||||
@@ -0,0 +1,71 @@
|
||||
import { extname } from 'node:path';
|
||||
import { PDFParse } from 'pdf-parse';
|
||||
|
||||
/** File extensions whose bytes are inherently text and can be decoded directly. */
|
||||
const TEXT_LIKE = new Set(['.txt', '.html', '.htm', '.eml']);
|
||||
|
||||
export function isTextLike(fileName: string): boolean {
|
||||
return TEXT_LIKE.has(extname(fileName).toLowerCase());
|
||||
}
|
||||
|
||||
export function isPdf(fileName: string): boolean {
|
||||
return extname(fileName).toLowerCase() === '.pdf';
|
||||
}
|
||||
|
||||
/** Strip HTML/XML tags and collapse whitespace for a cleaner LLM prompt. */
|
||||
function stripMarkup(s: string): string {
|
||||
return s
|
||||
.replace(/<script[\s\S]*?<\/script>/gi, ' ')
|
||||
.replace(/<style[\s\S]*?<\/style>/gi, ' ')
|
||||
.replace(/<[^>]+>/g, ' ')
|
||||
.replace(/ /gi, ' ')
|
||||
.replace(/[ \t]+/g, ' ')
|
||||
.replace(/\n{3,}/g, '\n\n')
|
||||
.trim();
|
||||
}
|
||||
|
||||
/** Extract the embedded text layer from a PDF (empty for scanned/image-only PDFs). */
|
||||
async function extractPdfText(buffer: Buffer): Promise<string> {
|
||||
const parser = new PDFParse({ data: new Uint8Array(buffer) });
|
||||
try {
|
||||
// Space (not tab) between same-line items reads more naturally for the LLM.
|
||||
const res = await parser.getText({ cellSeparator: ' ' });
|
||||
return cleanPdfText(res.text ?? '');
|
||||
} finally {
|
||||
await parser.destroy?.();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Clean up pdf-parse output for the LLM:
|
||||
* - strip `-- N of M --` page markers
|
||||
* - normalize whitespace/tabs
|
||||
* - collapse letter-spaced UPPERCASE runs ("A M S T E R D A M" → "AMSTERDAM"),
|
||||
* a common PDF kerning artifact that otherwise hides booking fields
|
||||
*/
|
||||
function cleanPdfText(text: string): string {
|
||||
return text
|
||||
.replace(/^\s*-+\s*\d+\s+of\s+\d+\s*-+\s*$/gim, '')
|
||||
.replace(/[ \t]+/g, ' ')
|
||||
.replace(/\b(?:[A-Z] ){2,}[A-Z]\b/g, m => m.replace(/ /g, ''))
|
||||
.replace(/ *\n */g, '\n')
|
||||
.replace(/\n{3,}/g, '\n\n')
|
||||
.trim();
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract text from a booking file for the OpenAI-compatible/local LLM path
|
||||
* (Ollama can't ingest PDFs or `file` parts, so everything becomes text).
|
||||
* - txt/html/htm/eml → decoded (markup stripped)
|
||||
* - pdf → embedded text layer via pdf-parse
|
||||
* - anything else → best-effort UTF-8 decode
|
||||
* A scanned/image-only PDF yields empty text — that case needs a vision provider
|
||||
* (Anthropic reads PDFs natively).
|
||||
*/
|
||||
export async function extractText(buffer: Buffer, fileName: string): Promise<string> {
|
||||
const ext = extname(fileName).toLowerCase();
|
||||
if (isPdf(fileName)) return extractPdfText(buffer);
|
||||
const raw = buffer.toString('utf8');
|
||||
if (ext === '.html' || ext === '.htm' || ext === '.eml') return stripMarkup(raw);
|
||||
return raw.trim();
|
||||
}
|
||||
@@ -11,6 +11,8 @@ import { revokeUserSessions, revokeUserSessionsForClient } from '../mcp';
|
||||
import { deleteUserCompletely } from './userCleanupService';
|
||||
import { validatePassword } from './passwordPolicy';
|
||||
import { getPhotoProviderConfig } from './memories/helpersService';
|
||||
import { ADDON_IDS } from '../addons';
|
||||
import { prepareLlmAddonConfigForWrite, maskLlmAddonConfig } from './llmConfig';
|
||||
import { send as sendNotification } from './notificationService';
|
||||
import { resolveAuthToggles } from './authService';
|
||||
|
||||
@@ -670,7 +672,13 @@ export function listAddons() {
|
||||
}
|
||||
|
||||
return [
|
||||
...addons.map(a => ({ ...a, enabled: !!a.enabled, config: JSON.parse(a.config || '{}') })),
|
||||
...addons.map(a => ({
|
||||
...a,
|
||||
enabled: !!a.enabled,
|
||||
config: a.id === ADDON_IDS.LLM_PARSING
|
||||
? maskLlmAddonConfig(JSON.parse(a.config || '{}'))
|
||||
: JSON.parse(a.config || '{}'),
|
||||
})),
|
||||
...providers.map(p => ({
|
||||
id: p.id,
|
||||
name: p.name,
|
||||
@@ -702,7 +710,14 @@ export function updateAddon(id: string, data: { enabled?: boolean; config?: Reco
|
||||
|
||||
if (addon) {
|
||||
if (data.enabled !== undefined) db.prepare('UPDATE addons SET enabled = ? WHERE id = ?').run(data.enabled ? 1 : 0, id);
|
||||
if (data.config !== undefined) db.prepare('UPDATE addons SET config = ? WHERE id = ?').run(JSON.stringify(data.config), id);
|
||||
if (data.config !== undefined) {
|
||||
// The AI-parsing addon holds an API key — encrypt it at rest and preserve
|
||||
// the stored key when the client echoes the mask sentinel (see llmConfig.ts).
|
||||
const configToStore = id === ADDON_IDS.LLM_PARSING
|
||||
? prepareLlmAddonConfigForWrite(data.config, JSON.parse(addon.config || '{}'))
|
||||
: data.config;
|
||||
db.prepare('UPDATE addons SET config = ? WHERE id = ?').run(JSON.stringify(configToStore), id);
|
||||
}
|
||||
} else {
|
||||
if (data.enabled !== undefined) db.prepare('UPDATE photo_providers SET enabled = ? WHERE id = ?').run(data.enabled ? 1 : 0, id);
|
||||
}
|
||||
@@ -710,7 +725,13 @@ export function updateAddon(id: string, data: { enabled?: boolean; config?: Reco
|
||||
const updatedAddon = db.prepare('SELECT * FROM addons WHERE id = ?').get(id) as Addon | undefined;
|
||||
const updatedProvider = db.prepare('SELECT * FROM photo_providers WHERE id = ?').get(id) as { id: string; name: string; description?: string | null; icon: string; enabled: number; sort_order: number } | undefined;
|
||||
const updated = updatedAddon
|
||||
? { ...updatedAddon, enabled: !!updatedAddon.enabled, config: JSON.parse(updatedAddon.config || '{}') }
|
||||
? {
|
||||
...updatedAddon,
|
||||
enabled: !!updatedAddon.enabled,
|
||||
config: updatedAddon.id === ADDON_IDS.LLM_PARSING
|
||||
? maskLlmAddonConfig(JSON.parse(updatedAddon.config || '{}'))
|
||||
: JSON.parse(updatedAddon.config || '{}'),
|
||||
}
|
||||
: updatedProvider
|
||||
? {
|
||||
id: updatedProvider.id,
|
||||
|
||||
@@ -0,0 +1,70 @@
|
||||
import { maybe_encrypt_api_key, decrypt_api_key } from './apiKeyCrypto';
|
||||
|
||||
/**
|
||||
* Shared types + helpers for the `llm_parsing` addon configuration.
|
||||
*
|
||||
* Config can live in two places (resolution happens in
|
||||
* server/src/nest/llm-parse/llm-config.resolver.ts):
|
||||
* - instance-wide: the `llm_parsing` addon's `config` JSON (admin-set, wins)
|
||||
* - per-user: the `llm_*` keys in the per-user settings table (fallback)
|
||||
*
|
||||
* The API key is encrypted at rest (reusing apiKeyCrypto) and never returned to
|
||||
* the client in plaintext — it is masked with MASKED_VALUE, matching the
|
||||
* per-user encrypted-settings pattern in settingsService.ts.
|
||||
*/
|
||||
|
||||
export type LlmProvider = 'local' | 'openai' | 'anthropic';
|
||||
|
||||
/** Fully-resolved config the clients consume. */
|
||||
export interface ResolvedLlmConfig {
|
||||
provider: LlmProvider;
|
||||
model: string;
|
||||
baseUrl?: string;
|
||||
apiKey?: string;
|
||||
multimodal: boolean;
|
||||
}
|
||||
|
||||
/** Shape of the admin instance config stored in `addons.config` (apiKey encrypted). */
|
||||
export interface LlmAddonConfig {
|
||||
provider?: LlmProvider;
|
||||
model?: string;
|
||||
baseUrl?: string;
|
||||
apiKey?: string;
|
||||
multimodal?: boolean;
|
||||
}
|
||||
|
||||
export const LLM_PROVIDERS: LlmProvider[] = ['local', 'openai', 'anthropic'];
|
||||
export const MASKED_VALUE = '••••••••';
|
||||
|
||||
/**
|
||||
* Prepare an admin config blob for persistence: encrypt a freshly-entered apiKey,
|
||||
* and preserve the previously-stored (already-encrypted) key when the client
|
||||
* echoes back the mask sentinel (i.e. the user didn't change it).
|
||||
*/
|
||||
export function prepareLlmAddonConfigForWrite(
|
||||
incoming: Record<string, unknown>,
|
||||
existingStored: Record<string, unknown> | undefined,
|
||||
): Record<string, unknown> {
|
||||
const out: Record<string, unknown> = { ...incoming };
|
||||
const key = incoming.apiKey;
|
||||
if (key === undefined || key === null || key === '' || key === MASKED_VALUE) {
|
||||
// Keep the existing encrypted key untouched (mask echoed or no key supplied).
|
||||
if (existingStored && 'apiKey' in existingStored) out.apiKey = existingStored.apiKey;
|
||||
else delete out.apiKey;
|
||||
} else {
|
||||
out.apiKey = maybe_encrypt_api_key(String(key)) ?? String(key);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
/** Mask the apiKey for any client-facing response (never leak plaintext). */
|
||||
export function maskLlmAddonConfig(config: Record<string, unknown>): Record<string, unknown> {
|
||||
if (config && config.apiKey) return { ...config, apiKey: MASKED_VALUE };
|
||||
return config;
|
||||
}
|
||||
|
||||
/** Decrypt the stored apiKey for server-side use (resolver only). */
|
||||
export function decryptLlmApiKey(stored: unknown): string | undefined {
|
||||
if (!stored) return undefined;
|
||||
return decrypt_api_key(stored) ?? undefined;
|
||||
}
|
||||
@@ -1,10 +1,10 @@
|
||||
import { db } from '../db/database';
|
||||
import { decrypt_api_key, maybe_encrypt_api_key } from './apiKeyCrypto';
|
||||
|
||||
const ENCRYPTED_SETTING_KEYS = new Set(['webhook_url', 'ntfy_token', 'mapbox_access_token']);
|
||||
const ENCRYPTED_SETTING_KEYS = new Set(['webhook_url', 'ntfy_token', 'mapbox_access_token', 'llm_api_key']);
|
||||
// Encrypted keys that are masked (••••••••) when returned to the client.
|
||||
// Keys not in this set but in ENCRYPTED_SETTING_KEYS are decrypted and returned.
|
||||
const MASKED_SETTING_KEYS = new Set(['webhook_url', 'ntfy_token']);
|
||||
const MASKED_SETTING_KEYS = new Set(['webhook_url', 'ntfy_token', 'llm_api_key']);
|
||||
|
||||
export const DEFAULTABLE_USER_SETTING_KEYS = [
|
||||
'temperature_unit',
|
||||
@@ -22,6 +22,16 @@ export const DEFAULTABLE_USER_SETTING_KEYS = [
|
||||
'mapbox_style',
|
||||
'mapbox_3d_enabled',
|
||||
'mapbox_quality_mode',
|
||||
// Per-user LLM fallback config for booking import (used when the admin has not
|
||||
// set instance-wide config on the llm_parsing addon). See llmConfig.ts.
|
||||
'llm_provider',
|
||||
'llm_model',
|
||||
'llm_base_url',
|
||||
'llm_multimodal',
|
||||
'llm_api_key',
|
||||
// "Always retry with AI" toggle — when on, the preview auto-runs the LLM on
|
||||
// files kitinerary returns nothing for.
|
||||
'llm_always_retry',
|
||||
] as const;
|
||||
|
||||
type DefaultableKey = typeof DEFAULTABLE_USER_SETTING_KEYS[number];
|
||||
@@ -31,9 +41,10 @@ const VALID_VALUES: Partial<Record<DefaultableKey, unknown[]>> = {
|
||||
time_format: ['12h', '24h'],
|
||||
dark_mode: [true, false, 'light', 'dark', 'auto'],
|
||||
map_provider: ['leaflet', 'mapbox-gl'],
|
||||
llm_provider: ['local', 'openai', 'anthropic'],
|
||||
};
|
||||
|
||||
const BOOLEAN_KEYS = new Set<DefaultableKey>(['blur_booking_codes', 'mapbox_3d_enabled', 'mapbox_quality_mode']);
|
||||
const BOOLEAN_KEYS = new Set<DefaultableKey>(['blur_booking_codes', 'mapbox_3d_enabled', 'mapbox_quality_mode', 'llm_multimodal', 'llm_always_retry']);
|
||||
|
||||
function parseValue(raw: string): unknown {
|
||||
try { return JSON.parse(raw); } catch { return raw; }
|
||||
@@ -154,3 +165,21 @@ export function bulkUpsertSettings(userId: number, settings: Record<string, unkn
|
||||
}
|
||||
return Object.keys(settings).length;
|
||||
}
|
||||
|
||||
/**
|
||||
* Read a single per-user setting, decrypting it if it's an encrypted key.
|
||||
* Unlike getUserSettings (which MASKS encrypted keys for the client), this
|
||||
* returns the plaintext — for server-side use only (e.g. the LLM config
|
||||
* resolver needs the real API key). Returns null when unset.
|
||||
*/
|
||||
export function getDecryptedUserSetting(userId: number, key: string): string | null {
|
||||
const row = db.prepare('SELECT value FROM settings WHERE user_id = ? AND key = ?').get(userId, key) as { value: string } | undefined;
|
||||
if (!row || row.value === '' || row.value == null) return null;
|
||||
if (ENCRYPTED_SETTING_KEYS.has(key)) return decrypt_api_key(row.value);
|
||||
try {
|
||||
const parsed = JSON.parse(row.value);
|
||||
return typeof parsed === 'string' ? parsed : row.value;
|
||||
} catch {
|
||||
return row.value;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,61 @@
|
||||
import { describe, it, expect, vi, beforeEach } from 'vitest';
|
||||
import { HttpException } from '@nestjs/common';
|
||||
import { BookingImportController } from '../../../../src/nest/booking-import/booking-import.controller';
|
||||
import type { BookingImportService } from '../../../../src/nest/booking-import/booking-import.service';
|
||||
import type { User } from '../../../../src/types';
|
||||
|
||||
const user = { id: 1, role: 'user' } as User;
|
||||
const file = (name = 'a.pdf') => ({ originalname: name, buffer: Buffer.from('x') } as Express.Multer.File);
|
||||
|
||||
function make(over: Partial<BookingImportService> = {}) {
|
||||
const svc = {
|
||||
verifyTripAccess: vi.fn(() => ({ user_id: 1 })),
|
||||
canEdit: vi.fn(() => true),
|
||||
isAvailable: vi.fn(() => true),
|
||||
aiAvailable: vi.fn(() => true),
|
||||
preview: vi.fn(async () => ({ items: [], warnings: [], files: [] })),
|
||||
...over,
|
||||
} as unknown as BookingImportService;
|
||||
return { c: new BookingImportController(svc), svc };
|
||||
}
|
||||
|
||||
async function status(fn: () => Promise<unknown>): Promise<number> {
|
||||
try { await fn(); } catch (e) { expect(e).toBeInstanceOf(HttpException); return (e as HttpException).getStatus(); }
|
||||
throw new Error('expected throw');
|
||||
}
|
||||
|
||||
beforeEach(() => vi.clearAllMocks());
|
||||
|
||||
describe('BookingImportController.preview', () => {
|
||||
it('rejects an invalid mode with 400', async () => {
|
||||
const { c } = make();
|
||||
expect(await status(() => c.preview(user, 't1', [file()], 'bogus'))).toBe(400);
|
||||
});
|
||||
|
||||
it('returns 409 for force-ai when AI is not configured', async () => {
|
||||
const { c } = make({ aiAvailable: vi.fn(() => false) as any });
|
||||
expect(await status(() => c.preview(user, 't1', [file()], 'force-ai'))).toBe(409);
|
||||
});
|
||||
|
||||
it('returns 503 for no-ai when the extractor is unavailable', async () => {
|
||||
const { c } = make({ isAvailable: vi.fn(() => false) as any });
|
||||
expect(await status(() => c.preview(user, 't1', [file()], 'no-ai'))).toBe(503);
|
||||
});
|
||||
|
||||
it('returns 400 when no files are uploaded', async () => {
|
||||
const { c } = make();
|
||||
expect(await status(() => c.preview(user, 't1', [], 'no-ai'))).toBe(400);
|
||||
});
|
||||
|
||||
it('passes the parsed mode and user id through to the service', async () => {
|
||||
const { c, svc } = make();
|
||||
await c.preview(user, 't1', [file()], 'fallback-on-empty');
|
||||
expect(svc.preview).toHaveBeenCalledWith([expect.anything()], 'fallback-on-empty', 1);
|
||||
});
|
||||
|
||||
it('defaults the mode to no-ai when omitted', async () => {
|
||||
const { c, svc } = make();
|
||||
await c.preview(user, 't1', [file()], undefined);
|
||||
expect(svc.preview).toHaveBeenCalledWith([expect.anything()], 'no-ai', 1);
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,79 @@
|
||||
import { describe, it, expect, vi, beforeEach } from 'vitest';
|
||||
import { HttpException } from '@nestjs/common';
|
||||
|
||||
// Mock the heavy side-effect imports so the service module loads cleanly; the
|
||||
// preview() path under test only touches the extractor + llmParse deps.
|
||||
vi.mock('../../../../src/db/database', () => ({ db: { prepare: vi.fn() }, closeDb: () => {}, reinitialize: () => {} }));
|
||||
vi.mock('../../../../src/websocket', () => ({ broadcast: vi.fn() }));
|
||||
vi.mock('../../../../src/services/permissions', () => ({ checkPermission: vi.fn(() => true) }));
|
||||
vi.mock('../../../../src/services/tripAccess', () => ({ verifyTripAccess: vi.fn() }));
|
||||
vi.mock('../../../../src/services/reservationService', () => ({ createReservation: vi.fn() }));
|
||||
vi.mock('../../../../src/services/placeService', () => ({ createPlace: vi.fn() }));
|
||||
vi.mock('../../../../src/services/mapsService', () => ({ searchNominatim: vi.fn() }));
|
||||
|
||||
import { BookingImportService } from '../../../../src/nest/booking-import/booking-import.service';
|
||||
|
||||
const HOTEL_KI = { '@type': 'LodgingReservation', reservationNumber: 'ABC', reservationFor: { name: 'Hotel X' }, checkinTime: '2026-06-11T15:00', checkoutTime: '2026-06-12T11:00' };
|
||||
const file = (name = 'a.pdf') => ({ buffer: Buffer.from('x'), originalname: name } as any);
|
||||
|
||||
function make(opts: { kit?: boolean; ai?: boolean; extract?: any; parse?: any }) {
|
||||
const extractor = { isAvailable: () => opts.kit ?? false, extract: vi.fn(opts.extract ?? (async () => [])) };
|
||||
const llmParse = { isAvailable: () => opts.ai ?? false, parse: vi.fn(opts.parse ?? (async () => ({ kiItems: [], warnings: [] }))) };
|
||||
return { svc: new BookingImportService(extractor as any, llmParse as any), extractor, llmParse };
|
||||
}
|
||||
|
||||
beforeEach(() => vi.clearAllMocks());
|
||||
|
||||
describe('BookingImportService.preview', () => {
|
||||
it('no-ai: maps kitinerary items, does not force needs_review, reports aiUsed:false', async () => {
|
||||
const { svc, llmParse } = make({ kit: true, ai: false, extract: async () => [HOTEL_KI] });
|
||||
const res = await svc.preview([file()], 'no-ai', 1);
|
||||
expect(res.items).toHaveLength(1);
|
||||
expect(res.items[0].needs_review).toBeFalsy();
|
||||
expect(res.files).toEqual([{ fileName: 'a.pdf', aiAvailable: false, aiUsed: false }]);
|
||||
expect(llmParse.parse).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('throws 503 when neither parser is available', async () => {
|
||||
const { svc } = make({ kit: false, ai: false });
|
||||
try {
|
||||
await svc.preview([file()], 'no-ai', 1);
|
||||
throw new Error('expected throw');
|
||||
} catch (err) {
|
||||
expect(err).toBeInstanceOf(HttpException);
|
||||
expect((err as HttpException).getStatus()).toBe(503);
|
||||
}
|
||||
});
|
||||
|
||||
it('fallback-on-empty: runs the LLM when kitinerary finds nothing and flags needs_review', async () => {
|
||||
const { svc, extractor, llmParse } = make({
|
||||
kit: true, ai: true,
|
||||
extract: async () => [],
|
||||
parse: async () => ({ kiItems: [HOTEL_KI], warnings: [] }),
|
||||
});
|
||||
const res = await svc.preview([file()], 'fallback-on-empty', 1);
|
||||
expect(extractor.extract).toHaveBeenCalled();
|
||||
expect(llmParse.parse).toHaveBeenCalled();
|
||||
expect(res.items).toHaveLength(1);
|
||||
expect(res.items[0].needs_review).toBe(true);
|
||||
expect(res.files![0]).toEqual({ fileName: 'a.pdf', aiAvailable: true, aiUsed: true });
|
||||
});
|
||||
|
||||
it('fallback-on-empty: skips the LLM when kitinerary already found items', async () => {
|
||||
const { svc, llmParse } = make({ kit: true, ai: true, extract: async () => [HOTEL_KI] });
|
||||
const res = await svc.preview([file()], 'fallback-on-empty', 1);
|
||||
expect(llmParse.parse).not.toHaveBeenCalled();
|
||||
expect(res.files![0].aiUsed).toBe(false);
|
||||
});
|
||||
|
||||
it('force-ai: skips kitinerary entirely and uses the LLM', async () => {
|
||||
const { svc, extractor, llmParse } = make({
|
||||
kit: true, ai: true,
|
||||
parse: async () => ({ kiItems: [HOTEL_KI], warnings: [] }),
|
||||
});
|
||||
const res = await svc.preview([file()], 'force-ai', 1);
|
||||
expect(extractor.extract).not.toHaveBeenCalled();
|
||||
expect(llmParse.parse).toHaveBeenCalled();
|
||||
expect(res.items[0].needs_review).toBe(true);
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,96 @@
|
||||
import { describe, it, expect, vi, beforeEach } from 'vitest';
|
||||
import { OpenAiCompatibleClient } from '../../../../src/nest/llm-parse/clients/openai-compatible.client';
|
||||
import { AnthropicClient } from '../../../../src/nest/llm-parse/clients/anthropic.client';
|
||||
import type { LlmExtractionInput } from '../../../../src/nest/llm-parse/llm-provider.interface';
|
||||
|
||||
const baseInput: LlmExtractionInput = {
|
||||
prompt: 'system',
|
||||
jsonSchema: { type: 'object' },
|
||||
model: 'm',
|
||||
text: 'Flight AB123',
|
||||
};
|
||||
|
||||
function mockFetch(impl: (url: string, init: RequestInit) => Promise<Response> | Response) {
|
||||
const fn = vi.fn(impl as any);
|
||||
vi.stubGlobal('fetch', fn);
|
||||
return fn;
|
||||
}
|
||||
|
||||
function jsonResponse(body: unknown, ok = true, status = 200): Response {
|
||||
return { ok, status, json: async () => body, text: async () => JSON.stringify(body) } as unknown as Response;
|
||||
}
|
||||
|
||||
beforeEach(() => vi.unstubAllGlobals());
|
||||
|
||||
describe('OpenAiCompatibleClient', () => {
|
||||
it('posts to {baseUrl}/chat/completions and returns the reservations array', async () => {
|
||||
const fetchFn = mockFetch(() =>
|
||||
jsonResponse({ choices: [{ message: { content: JSON.stringify({ reservations: [{ '@type': 'FlightReservation' }] }) } }] }),
|
||||
);
|
||||
const out = await new OpenAiCompatibleClient().extract({ ...baseInput, baseUrl: 'http://localhost:11434/v1/' });
|
||||
expect(out).toEqual([{ '@type': 'FlightReservation' }]);
|
||||
expect(fetchFn.mock.calls[0][0]).toBe('http://localhost:11434/v1/chat/completions');
|
||||
});
|
||||
|
||||
it('tolerates code-fenced JSON', async () => {
|
||||
mockFetch(() =>
|
||||
jsonResponse({ choices: [{ message: { content: '```json\n{"reservations":[{"@type":"TrainReservation"}]}\n```' } }] }),
|
||||
);
|
||||
const out = await new OpenAiCompatibleClient().extract(baseInput);
|
||||
expect(out).toEqual([{ '@type': 'TrainReservation' }]);
|
||||
});
|
||||
|
||||
it('returns [] on malformed content', async () => {
|
||||
mockFetch(() => jsonResponse({ choices: [{ message: { content: 'not json' } }] }));
|
||||
expect(await new OpenAiCompatibleClient().extract(baseInput)).toEqual([]);
|
||||
});
|
||||
|
||||
it('throws on non-2xx', async () => {
|
||||
mockFetch(() => jsonResponse({ error: 'bad' }, false, 401));
|
||||
await expect(new OpenAiCompatibleClient().extract(baseInput)).rejects.toThrow(/401/);
|
||||
});
|
||||
|
||||
it('sends an image natively as image_url but never a file/pdf part', async () => {
|
||||
const fetchFn = mockFetch(() => jsonResponse({ choices: [{ message: { content: '{"reservations":[]}' } }] }));
|
||||
await new OpenAiCompatibleClient().extract({ ...baseInput, file: { mimeType: 'image/png', data: Buffer.from('IMG') } });
|
||||
let parts = JSON.parse((fetchFn.mock.calls[0][1] as RequestInit).body as string).messages[1].content;
|
||||
expect(parts.some((p: any) => p.type === 'image_url')).toBe(true);
|
||||
expect(parts.some((p: any) => p.type === 'file')).toBe(false);
|
||||
|
||||
// A PDF must NOT be sent as a content part (Ollama rejects it).
|
||||
await new OpenAiCompatibleClient().extract({ ...baseInput, file: { mimeType: 'application/pdf', data: Buffer.from('PDF') } });
|
||||
parts = JSON.parse((fetchFn.mock.calls[1][1] as RequestInit).body as string).messages[1].content;
|
||||
expect(parts.every((p: any) => p.type !== 'file' && p.type !== 'image_url')).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe('AnthropicClient', () => {
|
||||
it('forces the emit_reservations tool and reads its input', async () => {
|
||||
const fetchFn = mockFetch(() =>
|
||||
jsonResponse({ stop_reason: 'tool_use', content: [{ type: 'tool_use', name: 'emit_reservations', input: { reservations: [{ '@type': 'LodgingReservation' }] } }] }),
|
||||
);
|
||||
const out = await new AnthropicClient().extract(baseInput);
|
||||
expect(out).toEqual([{ '@type': 'LodgingReservation' }]);
|
||||
const body = JSON.parse((fetchFn.mock.calls[0][1] as RequestInit).body as string);
|
||||
expect(body.tool_choice).toEqual({ type: 'tool', name: 'emit_reservations' });
|
||||
expect(body.tools[0].name).toBe('emit_reservations');
|
||||
});
|
||||
|
||||
it('throws on a refusal stop_reason', async () => {
|
||||
mockFetch(() => jsonResponse({ stop_reason: 'refusal', content: [] }));
|
||||
await expect(new AnthropicClient().extract(baseInput)).rejects.toThrow(/declined/i);
|
||||
});
|
||||
|
||||
it('throws on non-2xx', async () => {
|
||||
mockFetch(() => jsonResponse({ error: 'bad' }, false, 500));
|
||||
await expect(new AnthropicClient().extract(baseInput)).rejects.toThrow(/500/);
|
||||
});
|
||||
|
||||
it('sends a native pdf as a base64 document block', async () => {
|
||||
const fetchFn = mockFetch(() => jsonResponse({ content: [{ type: 'tool_use', name: 'emit_reservations', input: { reservations: [] } }] }));
|
||||
await new AnthropicClient().extract({ ...baseInput, file: { mimeType: 'application/pdf', data: Buffer.from('PDF') } });
|
||||
const body = JSON.parse((fetchFn.mock.calls[0][1] as RequestInit).body as string);
|
||||
const blocks = body.messages[0].content;
|
||||
expect(blocks.some((b: any) => b.type === 'document' && b.source.type === 'base64')).toBe(true);
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,67 @@
|
||||
import { describe, it, expect, vi, beforeEach } from 'vitest';
|
||||
|
||||
const { dbMock } = vi.hoisted(() => {
|
||||
const stmt = { get: vi.fn() };
|
||||
return { dbMock: { prepare: vi.fn(() => stmt), _stmt: stmt } };
|
||||
});
|
||||
vi.mock('../../../../src/db/database', () => ({ db: dbMock, closeDb: () => {}, reinitialize: () => {} }));
|
||||
|
||||
const { isAddonEnabled } = vi.hoisted(() => ({ isAddonEnabled: vi.fn() }));
|
||||
vi.mock('../../../../src/services/adminService', () => ({ isAddonEnabled }));
|
||||
|
||||
const { getUserSettings, getDecryptedUserSetting } = vi.hoisted(() => ({
|
||||
getUserSettings: vi.fn(() => ({}) as Record<string, unknown>),
|
||||
getDecryptedUserSetting: vi.fn(() => null as string | null),
|
||||
}));
|
||||
vi.mock('../../../../src/services/settingsService', () => ({ getUserSettings, getDecryptedUserSetting }));
|
||||
|
||||
import { resolveLlmConfig } from '../../../../src/nest/llm-parse/llm-config.resolver';
|
||||
|
||||
function setInstanceConfig(config: unknown) {
|
||||
dbMock._stmt.get.mockReturnValue(config === undefined ? undefined : { config: JSON.stringify(config) });
|
||||
}
|
||||
|
||||
beforeEach(() => {
|
||||
vi.clearAllMocks();
|
||||
isAddonEnabled.mockReturnValue(true);
|
||||
setInstanceConfig(undefined);
|
||||
getUserSettings.mockReturnValue({});
|
||||
getDecryptedUserSetting.mockReturnValue(null);
|
||||
});
|
||||
|
||||
describe('resolveLlmConfig', () => {
|
||||
it('returns null when the addon is disabled', () => {
|
||||
isAddonEnabled.mockReturnValue(false);
|
||||
expect(resolveLlmConfig(1)).toBeNull();
|
||||
});
|
||||
|
||||
it('uses instance config when present (and decrypts the key)', () => {
|
||||
setInstanceConfig({ provider: 'anthropic', model: 'claude-opus-4-8', apiKey: 'sk-plain', multimodal: true });
|
||||
expect(resolveLlmConfig(1)).toEqual({
|
||||
provider: 'anthropic',
|
||||
model: 'claude-opus-4-8',
|
||||
baseUrl: undefined,
|
||||
apiKey: 'sk-plain',
|
||||
multimodal: true,
|
||||
});
|
||||
});
|
||||
|
||||
it('falls back to per-user config when instance config is incomplete', () => {
|
||||
setInstanceConfig({ provider: 'anthropic' }); // no model → not usable
|
||||
getUserSettings.mockReturnValue({ llm_provider: 'local', llm_model: 'nuextract', llm_base_url: 'http://x/v1', llm_multimodal: true });
|
||||
getDecryptedUserSetting.mockReturnValue('user-key');
|
||||
expect(resolveLlmConfig(7)).toEqual({
|
||||
provider: 'local',
|
||||
model: 'nuextract',
|
||||
baseUrl: 'http://x/v1',
|
||||
apiKey: 'user-key',
|
||||
multimodal: true,
|
||||
});
|
||||
expect(getDecryptedUserSetting).toHaveBeenCalledWith(7, 'llm_api_key');
|
||||
});
|
||||
|
||||
it('returns null when neither instance nor user config is usable', () => {
|
||||
getUserSettings.mockReturnValue({ llm_provider: 'openai' }); // no model
|
||||
expect(resolveLlmConfig(1)).toBeNull();
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,60 @@
|
||||
import { describe, it, expect, vi, beforeEach } from 'vitest';
|
||||
import { HttpException } from '@nestjs/common';
|
||||
import { LlmLocalService } from '../../../../src/nest/llm-parse/llm-local.service';
|
||||
|
||||
const svc = () => new LlmLocalService();
|
||||
|
||||
function mockFetch(impl: any) {
|
||||
const fn = vi.fn(impl);
|
||||
vi.stubGlobal('fetch', fn);
|
||||
return fn;
|
||||
}
|
||||
|
||||
beforeEach(() => vi.unstubAllGlobals());
|
||||
|
||||
describe('LlmLocalService.ollamaRoot', () => {
|
||||
it('strips a trailing /v1 and slashes', () => {
|
||||
expect(svc().ollamaRoot('http://localhost:11434/v1')).toBe('http://localhost:11434');
|
||||
expect(svc().ollamaRoot('http://localhost:11434/v1/')).toBe('http://localhost:11434');
|
||||
expect(svc().ollamaRoot('http://host:1/')).toBe('http://host:1');
|
||||
});
|
||||
|
||||
it('defaults when no base URL is given', () => {
|
||||
expect(svc().ollamaRoot(undefined)).toBe('http://localhost:11434');
|
||||
});
|
||||
|
||||
it('rejects non-http(s) and invalid URLs', () => {
|
||||
expect(() => svc().ollamaRoot('ftp://x')).toThrow(HttpException);
|
||||
expect(() => svc().ollamaRoot('not a url')).toThrow(HttpException);
|
||||
});
|
||||
});
|
||||
|
||||
describe('LlmLocalService.listModels', () => {
|
||||
it('returns named models from /api/tags', async () => {
|
||||
const fetchFn = mockFetch(async () => ({ ok: true, json: async () => ({ models: [{ name: 'nuextract', size: 100 }, { name: '' }] }) }));
|
||||
const out = await svc().listModels('http://localhost:11434/v1');
|
||||
expect(out.models).toEqual([{ name: 'nuextract', size: 100 }]);
|
||||
expect(fetchFn.mock.calls[0][0]).toBe('http://localhost:11434/api/tags');
|
||||
});
|
||||
|
||||
it('502s when the server is unreachable', async () => {
|
||||
mockFetch(async () => { throw new Error('ECONNREFUSED'); });
|
||||
await expect(svc().listModels('http://localhost:11434')).rejects.toThrow(HttpException);
|
||||
});
|
||||
});
|
||||
|
||||
describe('LlmLocalService.pull', () => {
|
||||
it('requires a model', async () => {
|
||||
await expect(svc().pull('http://localhost:11434', '')).rejects.toThrow(HttpException);
|
||||
});
|
||||
|
||||
it('posts to /api/pull and returns the stream body', async () => {
|
||||
const body = {} as ReadableStream<Uint8Array>;
|
||||
const fetchFn = mockFetch(async () => ({ ok: true, body }));
|
||||
const out = await svc().pull('http://localhost:11434/v1', 'nuextract');
|
||||
expect(out).toBe(body);
|
||||
expect(fetchFn.mock.calls[0][0]).toBe('http://localhost:11434/api/pull');
|
||||
const init = fetchFn.mock.calls[0][1];
|
||||
expect(JSON.parse(init.body)).toEqual({ model: 'nuextract', stream: true });
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,116 @@
|
||||
import { describe, it, expect, vi, beforeEach } from 'vitest';
|
||||
|
||||
const { resolveLlmConfig } = vi.hoisted(() => ({ resolveLlmConfig: vi.fn() }));
|
||||
vi.mock('../../../../src/nest/llm-parse/llm-config.resolver', () => ({ resolveLlmConfig }));
|
||||
|
||||
const { createLlmClient, extract } = vi.hoisted(() => {
|
||||
const extract = vi.fn();
|
||||
return { createLlmClient: vi.fn(() => ({ extract })), extract };
|
||||
});
|
||||
vi.mock('../../../../src/nest/llm-parse/llm-client.factory', () => ({ createLlmClient }));
|
||||
|
||||
const { extractText } = vi.hoisted(() => ({ extractText: vi.fn(async () => 'Flight AB123') }));
|
||||
vi.mock('../../../../src/nest/llm-parse/text-extract', async (orig) => {
|
||||
const actual = await orig() as Record<string, unknown>;
|
||||
return { ...actual, extractText };
|
||||
});
|
||||
|
||||
import { LlmParseService } from '../../../../src/nest/llm-parse/llm-parse.service';
|
||||
|
||||
const cfg = (over: Record<string, unknown> = {}) => ({ provider: 'openai', model: 'm', multimodal: false, ...over });
|
||||
const svc = () => new LlmParseService();
|
||||
const file = (name: string, body = 'Flight AB123') => ({ buffer: Buffer.from(body), originalName: name });
|
||||
|
||||
beforeEach(() => {
|
||||
vi.clearAllMocks();
|
||||
resolveLlmConfig.mockReturnValue(cfg());
|
||||
extract.mockResolvedValue([{ '@type': 'FlightReservation' }]);
|
||||
extractText.mockResolvedValue('Flight AB123');
|
||||
});
|
||||
|
||||
describe('LlmParseService', () => {
|
||||
it('isAvailable reflects whether a config resolves', () => {
|
||||
resolveLlmConfig.mockReturnValueOnce(null);
|
||||
expect(svc().isAvailable(1)).toBe(false);
|
||||
expect(svc().isAvailable(1)).toBe(true);
|
||||
});
|
||||
|
||||
it('returns a not-configured warning when no config resolves', async () => {
|
||||
resolveLlmConfig.mockReturnValue(null);
|
||||
const res = await svc().parse(file('a.txt'), 1);
|
||||
expect(res.kiItems).toEqual([]);
|
||||
expect(res.warnings[0]).toMatch(/not configured/i);
|
||||
expect(extract).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('sends extracted text for a text-like file', async () => {
|
||||
const res = await svc().parse(file('a.txt'), 1);
|
||||
expect(res.kiItems).toEqual([{ '@type': 'FlightReservation' }]);
|
||||
const input = extract.mock.calls[0][0];
|
||||
expect(input.text).toBe('Flight AB123');
|
||||
expect(input.file).toBeUndefined();
|
||||
});
|
||||
|
||||
it('extracts text for a pdf on the OpenAI-compatible/local path (no native bytes)', async () => {
|
||||
extractText.mockResolvedValue('Hotel X');
|
||||
await svc().parse(file('a.pdf', '%PDF'), 1);
|
||||
const input = extract.mock.calls[0][0];
|
||||
expect(input.text).toBe('Hotel X');
|
||||
expect(input.file).toBeUndefined();
|
||||
});
|
||||
|
||||
it('sends a pdf as native bytes only for Anthropic', async () => {
|
||||
resolveLlmConfig.mockReturnValue(cfg({ provider: 'anthropic' }));
|
||||
await svc().parse(file('a.pdf', '%PDF'), 1);
|
||||
const input = extract.mock.calls[0][0];
|
||||
expect(input.file).toEqual({ mimeType: 'application/pdf', data: expect.any(Buffer) });
|
||||
expect(input.text).toBeUndefined();
|
||||
expect(extractText).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('warns when a pdf yields no readable text (e.g. a scan)', async () => {
|
||||
extractText.mockResolvedValue(' ');
|
||||
const res = await svc().parse(file('a.pdf', '%PDF'), 1);
|
||||
expect(res.kiItems).toEqual([]);
|
||||
expect(res.warnings[0]).toMatch(/no readable text/i);
|
||||
expect(extract).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('folds flattened type fields into reservationFor (small-model output)', async () => {
|
||||
extract.mockResolvedValue([{
|
||||
'@type': 'FlightReservation',
|
||||
reservationNumber: 'ABC',
|
||||
flightNumber: 'EZY1357',
|
||||
airline: { iataCode: 'EG' },
|
||||
departureAirport: { iataCode: 'GEG' },
|
||||
arrivalAirport: { iataCode: 'AMS' },
|
||||
departureTime: '2026-06-11T10:00:00',
|
||||
}]);
|
||||
const res = await svc().parse(file('a.txt'), 1);
|
||||
const item = res.kiItems[0] as any;
|
||||
expect(item.reservationNumber).toBe('ABC');
|
||||
expect(item.reservationFor).toMatchObject({ flightNumber: 'EZY1357', departureAirport: { iataCode: 'GEG' } });
|
||||
// root-level keys are not duplicated into reservationFor
|
||||
expect(item.reservationFor.reservationNumber).toBeUndefined();
|
||||
});
|
||||
|
||||
it('leaves already-nested reservationFor untouched', async () => {
|
||||
extract.mockResolvedValue([{ '@type': 'FlightReservation', reservationFor: { flightNumber: 'X1' } }]);
|
||||
const res = await svc().parse(file('a.txt'), 1);
|
||||
expect((res.kiItems[0] as any).reservationFor).toEqual({ flightNumber: 'X1' });
|
||||
});
|
||||
|
||||
it('drops nodes without a string @type and warns', async () => {
|
||||
extract.mockResolvedValue([{ '@type': 'FlightReservation' }, { foo: 'bar' }]);
|
||||
const res = await svc().parse(file('a.txt'), 1);
|
||||
expect(res.kiItems).toEqual([{ '@type': 'FlightReservation' }]);
|
||||
expect(res.warnings.some(w => /unrecognized/i.test(w))).toBe(true);
|
||||
});
|
||||
|
||||
it('degrades to a warning when the client throws', async () => {
|
||||
extract.mockRejectedValue(new Error('boom'));
|
||||
const res = await svc().parse(file('a.txt'), 1);
|
||||
expect(res.kiItems).toEqual([]);
|
||||
expect(res.warnings[0]).toMatch(/AI parsing failed/i);
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,26 @@
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { buildSystemPrompt, KI_RESERVATION_JSON_SCHEMA } from '../../../../src/nest/llm-parse/llm-prompt';
|
||||
import { KI_RESERVATION_TYPES } from '@trek/shared';
|
||||
|
||||
describe('llm-prompt', () => {
|
||||
it('names every recognized @type the mapper supports', () => {
|
||||
const prompt = buildSystemPrompt();
|
||||
for (const t of KI_RESERVATION_TYPES) expect(prompt).toContain(t);
|
||||
});
|
||||
|
||||
it('instructs JSON-only output wrapped in reservations', () => {
|
||||
const prompt = buildSystemPrompt();
|
||||
expect(prompt).toMatch(/"reservations"/);
|
||||
expect(prompt.toLowerCase()).toContain('iso 8601');
|
||||
});
|
||||
|
||||
it('exposes a strict-safe object-root JSON schema enumerating the types', () => {
|
||||
const schema = KI_RESERVATION_JSON_SCHEMA as any;
|
||||
expect(schema.type).toBe('object');
|
||||
expect(schema.additionalProperties).toBe(false);
|
||||
expect(schema.required).toContain('reservations');
|
||||
const item = schema.properties.reservations.items;
|
||||
expect(item.properties['@type'].enum).toEqual([...KI_RESERVATION_TYPES]);
|
||||
expect(item.required).toContain('@type');
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,40 @@
|
||||
import { describe, it, expect, vi } from 'vitest';
|
||||
|
||||
const { getText } = vi.hoisted(() => ({ getText: vi.fn(async () => ({ text: 'Hotel X — confirmation ABC' })) }));
|
||||
vi.mock('pdf-parse', () => ({
|
||||
PDFParse: class {
|
||||
getText = getText;
|
||||
destroy = vi.fn(async () => {});
|
||||
},
|
||||
}));
|
||||
|
||||
import { isTextLike, isPdf, extractText } from '../../../../src/nest/llm-parse/text-extract';
|
||||
|
||||
describe('text-extract', () => {
|
||||
it('classifies text-like and pdf extensions', () => {
|
||||
expect(isTextLike('a.txt')).toBe(true);
|
||||
expect(isTextLike('a.html')).toBe(true);
|
||||
expect(isTextLike('a.eml')).toBe(true);
|
||||
expect(isTextLike('a.pdf')).toBe(false);
|
||||
expect(isPdf('a.PDF')).toBe(true);
|
||||
expect(isPdf('a.txt')).toBe(false);
|
||||
});
|
||||
|
||||
it('decodes plain text', async () => {
|
||||
expect(await extractText(Buffer.from('hello world'), 'a.txt')).toBe('hello world');
|
||||
});
|
||||
|
||||
it('strips markup from html/eml', async () => {
|
||||
const html = '<html><style>x{}</style><body><p>Flight AB123</p><script>1</script></body></html>';
|
||||
const out = await extractText(Buffer.from(html), 'a.html');
|
||||
expect(out).toContain('Flight AB123');
|
||||
expect(out).not.toContain('<p>');
|
||||
expect(out).not.toContain('x{}');
|
||||
});
|
||||
|
||||
it('extracts the embedded text layer from a pdf', async () => {
|
||||
const out = await extractText(Buffer.from('%PDF-1.4'), 'a.pdf');
|
||||
expect(out).toBe('Hotel X — confirmation ABC');
|
||||
expect(getText).toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user