mirror of
https://github.com/mauriceboe/TREK.git
synced 2026-06-29 10:11:46 +00:00
test(llm-parse): cover the extraction router, client factory and import jobs
The new LLM extraction router shipped with little branch coverage, dropping src/nest below the 80% gate. Add unit tests for routeExtraction (flights/single/union/error paths, deterministic booking-wide fill), the native Ollama format client, the provider factory, the local-router service path with its type-aware text cap, the flat->schema.org mapper's remaining reservation types, and the background import-jobs runner. Also remove the now-unused validate.ts (only its FlatLike type was still referenced; moved to flat-schemas).
This commit is contained in:
@@ -20,9 +20,8 @@
|
||||
|
||||
import type { KiReservation } from '../../booking-import/kitinerary.types';
|
||||
import { nuExtractToKiReservations } from '../clients/nuextract';
|
||||
import { FLAT_SCHEMA_BY_TYPE, FLIGHTS_ARRAY_SCHEMA, UNION_SINGLE_SCHEMA, type FlatType } from './flat-schemas';
|
||||
import { FLAT_SCHEMA_BY_TYPE, FLIGHTS_ARRAY_SCHEMA, UNION_SINGLE_SCHEMA, type FlatType, type FlatLike } from './flat-schemas';
|
||||
import { extractEnforced } from './ollama-format.client';
|
||||
import type { FlatLike } from './validate';
|
||||
|
||||
export interface RouterContext {
|
||||
baseUrl: string;
|
||||
|
||||
@@ -19,6 +19,23 @@ export type FlatType = 'flight' | 'train' | 'bus' | 'ferry' | 'car' | 'hotel' |
|
||||
|
||||
export const FLAT_TYPES: FlatType[] = ['flight', 'train', 'bus', 'ferry', 'car', 'hotel', 'restaurant', 'event'];
|
||||
|
||||
/** A flat reservation as the model emits it, before mapping to schema.org. The named fields
|
||||
* are the ones the router reads directly; the index signature carries the rest unchanged. */
|
||||
export interface FlatLike {
|
||||
type: FlatType;
|
||||
booking_reference?: string;
|
||||
vehicle_number?: string;
|
||||
from_code?: string;
|
||||
to_code?: string;
|
||||
from_name?: string;
|
||||
to_name?: string;
|
||||
departure_time?: string;
|
||||
arrival_time?: string;
|
||||
checkin_time?: string;
|
||||
checkout_time?: string;
|
||||
[k: string]: unknown;
|
||||
}
|
||||
|
||||
type JsonSchema = Record<string, unknown>;
|
||||
|
||||
const STR = { type: 'string' } as const;
|
||||
|
||||
@@ -1,102 +0,0 @@
|
||||
/**
|
||||
* Schicht 2 — semantic validation of an extracted flat reservation.
|
||||
*
|
||||
* Constrained decoding guarantees the JSON is structurally valid, but NOT that the
|
||||
* values make sense. This layer catches the failure modes that actually hurt users —
|
||||
* a date with no day, a check-out before check-in, a bogus IATA code, a missing
|
||||
* booking reference — and returns a human-readable problem list. The router feeds that
|
||||
* list back to the model for ONE targeted repair pass; whatever still fails is left for
|
||||
* the human (the review-before-save modal, Schicht 3) rather than silently dropped.
|
||||
*/
|
||||
|
||||
import { findByIata } from '../../../services/airportService';
|
||||
import type { FlatType } from './flat-schemas';
|
||||
|
||||
/** A value that contains a full calendar date (YYYY-MM-DD), not just a time. */
|
||||
function hasFullDate(v: unknown): boolean {
|
||||
return typeof v === 'string' && /\d{4}-\d{2}-\d{2}/.test(v);
|
||||
}
|
||||
|
||||
/** The YYYY-MM-DD portion, or null. */
|
||||
function datePart(v: unknown): string | null {
|
||||
if (typeof v !== 'string') return null;
|
||||
const m = v.match(/\d{4}-\d{2}-\d{2}/);
|
||||
return m ? m[0] : null;
|
||||
}
|
||||
|
||||
function looksLikeIata(v: unknown): boolean {
|
||||
return typeof v === 'string' && /^[A-Za-z]{3}$/.test(v.trim());
|
||||
}
|
||||
|
||||
export interface FlatLike {
|
||||
type: FlatType;
|
||||
booking_reference?: string;
|
||||
vehicle_number?: string;
|
||||
from_code?: string;
|
||||
to_code?: string;
|
||||
from_name?: string;
|
||||
to_name?: string;
|
||||
departure_time?: string;
|
||||
arrival_time?: string;
|
||||
checkin_time?: string;
|
||||
checkout_time?: string;
|
||||
[k: string]: unknown;
|
||||
}
|
||||
|
||||
const TRANSPORT: FlatType[] = ['flight', 'train', 'bus', 'ferry'];
|
||||
|
||||
/**
|
||||
* Return a list of human-readable problems with a flat reservation, suitable for a
|
||||
* repair prompt. An empty list means it passed. `requireReference` adds a check for a
|
||||
* missing booking code (bookings almost always carry one — a miss usually means the
|
||||
* model skipped it, not that it's absent).
|
||||
*/
|
||||
export function validateFlat(flat: FlatLike, requireReference = true): string[] {
|
||||
const problems: string[] = [];
|
||||
const t = flat.type;
|
||||
|
||||
if (requireReference && !str(flat.booking_reference)) {
|
||||
problems.push('the booking/confirmation reference is missing — copy it from the document');
|
||||
}
|
||||
|
||||
if (TRANSPORT.includes(t)) {
|
||||
if (!str(flat.from_code) && !str(flat.from_name)) problems.push('missing departure location');
|
||||
if (!str(flat.to_code) && !str(flat.to_name)) problems.push('missing arrival location');
|
||||
if (!hasFullDate(flat.departure_time)) {
|
||||
problems.push("departure_time must be a full date-time (YYYY-MM-DDTHH:MM:00) using THIS segment's date");
|
||||
}
|
||||
if (t === 'flight') {
|
||||
if (!str(flat.vehicle_number)) problems.push('missing flight number');
|
||||
for (const [label, code] of [['departure', flat.from_code], ['arrival', flat.to_code]] as const) {
|
||||
if (str(code) && !looksLikeIata(code)) problems.push(`${label} airport code "${String(code)}" is not a 3-letter IATA code`);
|
||||
else if (looksLikeIata(code) && !findByIata(String(code).toUpperCase())) {
|
||||
problems.push(`${label} airport code "${String(code).toUpperCase()}" is not a known IATA code — re-check it`);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (hasFullDate(flat.departure_time) && hasFullDate(flat.arrival_time)) {
|
||||
if (new Date(flat.arrival_time as string) < new Date(flat.departure_time as string)) {
|
||||
problems.push('arrival_time is before departure_time — re-read the times');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (t === 'hotel') {
|
||||
if (!hasFullDate(flat.checkin_time)) problems.push('checkin_time must be a full date');
|
||||
if (!hasFullDate(flat.checkout_time)) problems.push('checkout_time must be a full date');
|
||||
const ci = datePart(flat.checkin_time);
|
||||
const co = datePart(flat.checkout_time);
|
||||
if (ci && co && co < ci) problems.push('check-out date is before check-in — re-read both dates');
|
||||
}
|
||||
|
||||
if (t === 'car') {
|
||||
if (!hasFullDate(flat.departure_time)) problems.push('the pickup date-time (departure_time) must be a full date');
|
||||
if (!hasFullDate(flat.arrival_time)) problems.push('the return date-time (arrival_time) must be a full date');
|
||||
}
|
||||
|
||||
return problems;
|
||||
}
|
||||
|
||||
function str(v: unknown): boolean {
|
||||
return typeof v === 'string' && v.trim().length > 0;
|
||||
}
|
||||
@@ -0,0 +1,75 @@
|
||||
import { describe, it, expect, vi, beforeEach } from 'vitest';
|
||||
|
||||
const { broadcastToUser } = vi.hoisted(() => ({ broadcastToUser: vi.fn() }));
|
||||
vi.mock('../../../../src/websocket', () => ({ broadcastToUser }));
|
||||
|
||||
import { ImportJobsService } from '../../../../src/nest/booking-import/import-jobs.service';
|
||||
|
||||
type Preview = ReturnType<typeof vi.fn>;
|
||||
function makeService(preview: Preview) {
|
||||
return new ImportJobsService({ preview } as never);
|
||||
}
|
||||
const files = (n: number) => Array.from({ length: n }, (_, i) => ({ originalname: `f${i}.pdf` })) as never;
|
||||
const eventsFor = (jobId: string) => broadcastToUser.mock.calls.map((c) => c[1]).filter((p) => p.jobId === jobId);
|
||||
|
||||
beforeEach(() => vi.clearAllMocks());
|
||||
|
||||
describe('ImportJobsService', () => {
|
||||
it('runs the parse off-request, reports progress and pushes the result on done', async () => {
|
||||
const preview = vi.fn(async (_f, _m, _u, onProgress: (d: number, t: number, name?: string) => void) => {
|
||||
onProgress(1, 2, 'f0.pdf');
|
||||
return { items: [{ id: 'x' }] };
|
||||
});
|
||||
const svc = makeService(preview);
|
||||
|
||||
const id = svc.start('7', files(2), 'fallback-on-empty', 42);
|
||||
expect(typeof id).toBe('string');
|
||||
|
||||
await vi.waitFor(() => expect(svc.get(id, 42)?.status).toBe('done'));
|
||||
const job = svc.get(id, 42)!;
|
||||
expect(job.result).toEqual({ items: [{ id: 'x' }] });
|
||||
expect(job.done).toBe(1);
|
||||
expect(preview).toHaveBeenCalledWith(expect.anything(), 'fallback-on-empty', 42, expect.any(Function));
|
||||
|
||||
const types = eventsFor(id).map((p) => p.type);
|
||||
expect(types).toContain('import:progress');
|
||||
expect(types).toContain('import:done');
|
||||
expect(eventsFor(id).every((p) => p.tripId === '7')).toBe(true);
|
||||
});
|
||||
|
||||
it('records an error and pushes import:error when the parse throws', async () => {
|
||||
const preview = vi.fn(async () => { throw new Error('parse boom'); });
|
||||
const svc = makeService(preview);
|
||||
|
||||
const id = svc.start('1', files(1), 'no-ai', 9);
|
||||
await vi.waitFor(() => expect(svc.get(id, 9)?.status).toBe('error'));
|
||||
expect(svc.get(id, 9)!.error).toBe('parse boom');
|
||||
expect(eventsFor(id).map((p) => p.type)).toContain('import:error');
|
||||
});
|
||||
|
||||
it('only returns a job to its owner', async () => {
|
||||
const svc = makeService(vi.fn(async () => ({ items: [] })));
|
||||
const id = svc.start('1', files(1), 'no-ai', 9);
|
||||
expect(svc.get(id, 9)).toBeDefined();
|
||||
expect(svc.get(id, 999)).toBeUndefined();
|
||||
expect(svc.get('does-not-exist', 9)).toBeUndefined();
|
||||
});
|
||||
|
||||
it('chains a user\'s parses so they run one at a time', async () => {
|
||||
const order: string[] = [];
|
||||
const preview = vi.fn(async (f: { originalname: string }[]) => {
|
||||
order.push(`start:${f[0].originalname}`);
|
||||
await new Promise((r) => setTimeout(r, 5));
|
||||
order.push(`end:${f[0].originalname}`);
|
||||
return { items: [] };
|
||||
});
|
||||
const svc = makeService(preview);
|
||||
|
||||
const a = svc.start('1', [{ originalname: 'A.pdf' }] as never, 'no-ai', 5);
|
||||
const b = svc.start('1', [{ originalname: 'B.pdf' }] as never, 'no-ai', 5);
|
||||
await vi.waitFor(() => expect(svc.get(b, 5)?.status).toBe('done'));
|
||||
expect(svc.get(a, 5)?.status).toBe('done');
|
||||
// B must not start before A finished — the per-user chain serializes them.
|
||||
expect(order).toEqual(['start:A.pdf', 'end:A.pdf', 'start:B.pdf', 'end:B.pdf']);
|
||||
});
|
||||
});
|
||||
@@ -1,5 +1,28 @@
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { extractBookingRef, extractTotalPrice, normCurrency } from '../../../../src/nest/llm-parse/router/extraction-router';
|
||||
import { describe, it, expect, vi, beforeEach } from 'vitest';
|
||||
|
||||
// The router's single model call and the schema.org mapper are mocked: we drive the
|
||||
// enforced-extract output directly and inspect the flat reservations handed to the mapper,
|
||||
// so these tests cover the router's orchestration and deterministic post-processing without
|
||||
// a live Ollama or the real mapper.
|
||||
const { extractEnforced, mapToKi } = vi.hoisted(() => ({ extractEnforced: vi.fn(), mapToKi: vi.fn() }));
|
||||
vi.mock('../../../../src/nest/llm-parse/router/ollama-format.client', () => ({ extractEnforced }));
|
||||
vi.mock('../../../../src/nest/llm-parse/clients/nuextract', () => ({ nuExtractToKiReservations: mapToKi }));
|
||||
|
||||
import {
|
||||
extractBookingRef,
|
||||
extractTotalPrice,
|
||||
normCurrency,
|
||||
detectFlightNumbers,
|
||||
fixArrivalDate,
|
||||
routeExtraction,
|
||||
} from '../../../../src/nest/llm-parse/router/extraction-router';
|
||||
|
||||
const CTX = { baseUrl: 'http://ollama:11434/v1', model: 'qwen3:8b' };
|
||||
|
||||
beforeEach(() => {
|
||||
vi.clearAllMocks();
|
||||
mapToKi.mockReturnValue([{ '@type': 'Mock' }]);
|
||||
});
|
||||
|
||||
describe('extractBookingRef', () => {
|
||||
it('reads an Airbnb "Bestätigungs-Code"', () => {
|
||||
@@ -41,6 +64,7 @@ describe('normCurrency', () => {
|
||||
expect(normCurrency('€')).toBe('EUR');
|
||||
expect(normCurrency('¥')).toBe('JPY');
|
||||
expect(normCurrency('$')).toBe('USD');
|
||||
expect(normCurrency('£')).toBe('GBP');
|
||||
expect(normCurrency('CHF')).toBe('CHF');
|
||||
});
|
||||
it('returns undefined for an unrecognised token', () => {
|
||||
@@ -48,3 +72,97 @@ describe('normCurrency', () => {
|
||||
expect(normCurrency('hello world')).toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
||||
describe('detectFlightNumbers', () => {
|
||||
it('finds flight numbers order-preserving and deduped', () => {
|
||||
expect(detectFlightNumbers('Flug LH 400, dann LH400 und BA1234')).toEqual(['LH400', 'BA1234']);
|
||||
});
|
||||
it('returns [] when there is no flight-number pattern', () => {
|
||||
expect(detectFlightNumbers('A hotel booking with no flight codes')).toEqual([]);
|
||||
});
|
||||
});
|
||||
|
||||
describe('fixArrivalDate', () => {
|
||||
it('keeps the same day when arrival is later than departure', () => {
|
||||
const out = fixArrivalDate({ type: 'flight', departure_time: '2025-08-23T10:00', arrival_time: '13:00' });
|
||||
expect(out.arrival_time).toBe('2025-08-23T13:00:00');
|
||||
});
|
||||
it('rolls to the next day for an overnight leg', () => {
|
||||
const out = fixArrivalDate({ type: 'flight', departure_time: '2025-08-30T18:00', arrival_time: '07:00' });
|
||||
expect(out.arrival_time).toBe('2025-08-31T07:00:00');
|
||||
});
|
||||
it('leaves a non-transport reservation untouched', () => {
|
||||
const hotel = { type: 'hotel' as const, arrival_time: '07:00' };
|
||||
expect(fixArrivalDate(hotel).arrival_time).toBe('07:00');
|
||||
});
|
||||
it('leaves it untouched when departure or arrival is missing', () => {
|
||||
expect(fixArrivalDate({ type: 'flight' }).arrival_time).toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
||||
describe('routeExtraction', () => {
|
||||
it('extracts every flight leg in one call and normalizes/rolls arrival dates', async () => {
|
||||
extractEnforced.mockResolvedValue({
|
||||
flights: [
|
||||
{ vehicle_number: 'LH400', from_code: 'FRA', to_code: 'JFK', departure_time: 'Aug 23 2025 10:00', arrival_time: '13:00' },
|
||||
{ vehicle_number: 'LH401', from_code: 'JFK', to_code: 'FRA', departure_time: '2025-08-30T18:00', arrival_time: '07:00' },
|
||||
],
|
||||
});
|
||||
const res = await routeExtraction('Flug LH 400 hin und zurück', CTX);
|
||||
expect(extractEnforced).toHaveBeenCalledTimes(1);
|
||||
expect(res.warnings).toEqual([]);
|
||||
expect(res.kiItems).toEqual([{ '@type': 'Mock' }]);
|
||||
const flats = mapToKi.mock.calls[0][0];
|
||||
expect(flats).toHaveLength(2);
|
||||
expect(flats[0].departure_time).toMatch(/^2025-08-23T\d{2}:\d{2}:00$/); // natural-language → ISO
|
||||
expect(flats[1].arrival_time).toBe('2025-08-31T07:00:00'); // overnight roll (TZ-safe: derived from the ISO departure date)
|
||||
});
|
||||
|
||||
it('extracts a single reservation with the type-specific schema when keywords give the type away', async () => {
|
||||
extractEnforced.mockResolvedValue({ name: 'B&B Hotel', address: 'Str 1', checkin_time: '2025-05-01', checkout_time: '2025-05-02' });
|
||||
const res = await routeExtraction('Hotel booking — check-in 1 May', CTX);
|
||||
expect(res.warnings).toEqual([]);
|
||||
const flats = mapToKi.mock.calls[0][0];
|
||||
expect(flats).toHaveLength(1);
|
||||
expect(flats[0].type).toBe('hotel');
|
||||
});
|
||||
|
||||
it('falls back to the union schema and the model-picked type for an unclear document', async () => {
|
||||
extractEnforced.mockResolvedValue({ type: 'event', name: 'Concert' });
|
||||
const res = await routeExtraction('A document with no obvious type keywords', CTX);
|
||||
const flats = mapToKi.mock.calls[0][0];
|
||||
expect(flats[0].type).toBe('event');
|
||||
expect(res.warnings).toEqual([]);
|
||||
});
|
||||
|
||||
it('defaults the union type to hotel when the model omits it', async () => {
|
||||
extractEnforced.mockResolvedValue({});
|
||||
await routeExtraction('No keywords and no type field present', CTX);
|
||||
expect(mapToKi.mock.calls[0][0][0].type).toBe('hotel');
|
||||
});
|
||||
|
||||
it('fills the booking reference and total price deterministically from the text', async () => {
|
||||
extractEnforced.mockResolvedValue({ name: 'B&B Hotel', checkin_time: '2025-05-01', checkout_time: '2025-05-02' });
|
||||
await routeExtraction('Hotel check-in\nBuchungsnummer: ABC123\nGesamtpreis 99,00 €', CTX);
|
||||
const flat = mapToKi.mock.calls[0][0][0];
|
||||
expect(flat.booking_reference).toBe('ABC123');
|
||||
expect(flat.price).toBe('99,00');
|
||||
expect(flat.currency).toBe('EUR');
|
||||
});
|
||||
|
||||
it("lets the document's currency override the model but keeps a price the model already found", async () => {
|
||||
extractEnforced.mockResolvedValue({ name: 'B&B Hotel', checkin_time: '2025-05-01', checkout_time: '2025-05-02', price: '50', currency: 'USD' });
|
||||
await routeExtraction('Hotel check-in\nGesamtpreis 99,00 €', CTX);
|
||||
const flat = mapToKi.mock.calls[0][0][0];
|
||||
expect(flat.currency).toBe('EUR'); // document symbol wins over the model guess
|
||||
expect(flat.price).toBe('50'); // a non-empty model price is kept
|
||||
});
|
||||
|
||||
it('returns a warning (and no items) when the model call throws', async () => {
|
||||
extractEnforced.mockRejectedValue(new Error('connection refused'));
|
||||
const res = await routeExtraction('Hotel check-in', CTX);
|
||||
expect(res.kiItems).toEqual([]);
|
||||
expect(res.warnings[0]).toContain('AI parsing failed');
|
||||
expect(res.warnings[0]).toContain('connection refused');
|
||||
});
|
||||
});
|
||||
|
||||
@@ -0,0 +1,23 @@
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { createLlmClient } from '../../../../src/nest/llm-parse/llm-client.factory';
|
||||
import { OpenAiCompatibleClient } from '../../../../src/nest/llm-parse/clients/openai-compatible.client';
|
||||
import { AnthropicClient } from '../../../../src/nest/llm-parse/clients/anthropic.client';
|
||||
import type { ResolvedLlmConfig } from '../../../../src/services/llmConfig';
|
||||
|
||||
const cfg = (provider: string): ResolvedLlmConfig =>
|
||||
({ provider, model: 'm', baseUrl: 'http://x', multimodal: false } as unknown as ResolvedLlmConfig);
|
||||
|
||||
describe('createLlmClient', () => {
|
||||
it('returns the Anthropic client for the anthropic provider', () => {
|
||||
expect(createLlmClient(cfg('anthropic'))).toBeInstanceOf(AnthropicClient);
|
||||
});
|
||||
|
||||
it('returns the OpenAI-compatible client for openai and local', () => {
|
||||
expect(createLlmClient(cfg('openai'))).toBeInstanceOf(OpenAiCompatibleClient);
|
||||
expect(createLlmClient(cfg('local'))).toBeInstanceOf(OpenAiCompatibleClient);
|
||||
});
|
||||
|
||||
it('falls back to the OpenAI-compatible client for an unknown provider', () => {
|
||||
expect(createLlmClient(cfg('something-else'))).toBeInstanceOf(OpenAiCompatibleClient);
|
||||
});
|
||||
});
|
||||
@@ -15,6 +15,12 @@ vi.mock('../../../../src/nest/llm-parse/text-extract', async (orig) => {
|
||||
return { ...actual, extractText };
|
||||
});
|
||||
|
||||
const { routeExtraction, detectFlightNumbers } = vi.hoisted(() => ({
|
||||
routeExtraction: vi.fn(),
|
||||
detectFlightNumbers: vi.fn(() => [] as string[]),
|
||||
}));
|
||||
vi.mock('../../../../src/nest/llm-parse/router/extraction-router', () => ({ routeExtraction, detectFlightNumbers }));
|
||||
|
||||
import { LlmParseService } from '../../../../src/nest/llm-parse/llm-parse.service';
|
||||
|
||||
const cfg = (over: Record<string, unknown> = {}) => ({ provider: 'openai', model: 'm', multimodal: false, ...over });
|
||||
@@ -26,6 +32,8 @@ beforeEach(() => {
|
||||
resolveLlmConfig.mockReturnValue(cfg());
|
||||
extract.mockResolvedValue([{ '@type': 'FlightReservation' }]);
|
||||
extractText.mockResolvedValue('Flight AB123');
|
||||
detectFlightNumbers.mockReturnValue([]);
|
||||
routeExtraction.mockResolvedValue({ kiItems: [{ '@type': 'LodgingReservation' }], warnings: [] });
|
||||
});
|
||||
|
||||
describe('LlmParseService', () => {
|
||||
@@ -113,4 +121,49 @@ describe('LlmParseService', () => {
|
||||
expect(res.kiItems).toEqual([]);
|
||||
expect(res.warnings[0]).toMatch(/AI parsing failed/i);
|
||||
});
|
||||
|
||||
it('routes the local provider through the extraction router instead of the single-shot client', async () => {
|
||||
resolveLlmConfig.mockReturnValue(cfg({ provider: 'local', baseUrl: 'http://ollama:11434/v1', apiKey: 'k' }));
|
||||
extractText.mockResolvedValue('Hotel booking');
|
||||
routeExtraction.mockResolvedValue({ kiItems: [{ '@type': 'LodgingReservation' }], warnings: ['note'] });
|
||||
const res = await svc().parse(file('a.txt'), 1);
|
||||
expect(res.kiItems).toEqual([{ '@type': 'LodgingReservation' }]);
|
||||
expect(res.warnings).toEqual(['note']);
|
||||
expect(extract).not.toHaveBeenCalled();
|
||||
expect(routeExtraction).toHaveBeenCalledWith('Hotel booking', { baseUrl: 'http://ollama:11434/v1', model: 'm', apiKey: 'k' });
|
||||
});
|
||||
|
||||
it('keeps the wide text cap (16k) for a local flight itinerary but tightens it (6k) otherwise', async () => {
|
||||
const long = 'x'.repeat(7000);
|
||||
extractText.mockResolvedValue(long);
|
||||
|
||||
resolveLlmConfig.mockReturnValue(cfg({ provider: 'local' }));
|
||||
detectFlightNumbers.mockReturnValue(['AB123']);
|
||||
await svc().parse(file('flights.txt'), 1);
|
||||
expect(routeExtraction.mock.calls[0][0]).toHaveLength(7000); // under the 16k cap, untouched
|
||||
|
||||
vi.clearAllMocks();
|
||||
resolveLlmConfig.mockReturnValue(cfg({ provider: 'local' }));
|
||||
extractText.mockResolvedValue(long);
|
||||
detectFlightNumbers.mockReturnValue([]);
|
||||
routeExtraction.mockResolvedValue({ kiItems: [], warnings: [] });
|
||||
await svc().parse(file('hotel.txt'), 1);
|
||||
expect(routeExtraction.mock.calls[0][0]).toHaveLength(6000); // single booking → tighter cap
|
||||
});
|
||||
|
||||
it('degrades to a warning when the local router throws', async () => {
|
||||
resolveLlmConfig.mockReturnValue(cfg({ provider: 'local' }));
|
||||
routeExtraction.mockRejectedValue(new Error('ollama down'));
|
||||
const res = await svc().parse(file('a.txt'), 1);
|
||||
expect(res.kiItems).toEqual([]);
|
||||
expect(res.warnings[0]).toMatch(/AI parsing failed/i);
|
||||
});
|
||||
|
||||
it('warns when the file cannot be read (text extraction throws)', async () => {
|
||||
extractText.mockRejectedValue(new Error('corrupt pdf'));
|
||||
const res = await svc().parse(file('a.pdf', '%PDF'), 1);
|
||||
expect(res.kiItems).toEqual([]);
|
||||
expect(res.warnings[0]).toMatch(/could not read file/i);
|
||||
expect(res.warnings[0]).toContain('corrupt pdf');
|
||||
});
|
||||
});
|
||||
|
||||
@@ -166,3 +166,62 @@ describe('nuExtractToKiReservations', () => {
|
||||
expect(nuExtractToKiReservations(null)).toEqual([]);
|
||||
});
|
||||
});
|
||||
|
||||
describe('nuExtractToKiReservations — remaining reservation types', () => {
|
||||
const one = (x: Record<string, unknown>) => nuExtractToKiReservations(x)[0];
|
||||
|
||||
it('maps a train into a TrainReservation with stations', () => {
|
||||
const node = one({ type: 'train', vehicle_number: 'ICE 597', from_name: 'Berlin Hbf', to_name: 'München Hbf', departure_time: '2025-05-01T08:00:00' });
|
||||
expect(node['@type']).toBe('TrainReservation');
|
||||
expect(node.reservationFor).toMatchObject({ trainNumber: 'ICE 597', departureStation: { name: 'Berlin Hbf' }, arrivalStation: { name: 'München Hbf' } });
|
||||
});
|
||||
|
||||
it('maps a bus into a BusReservation with stops', () => {
|
||||
const node = one({ type: 'bus', vehicle_number: 'FB42', from_name: 'Köln', to_name: 'Paris' });
|
||||
expect(node['@type']).toBe('BusReservation');
|
||||
expect(node.reservationFor).toMatchObject({ busNumber: 'FB42', departureBusStop: { name: 'Köln' }, arrivalBusStop: { name: 'Paris' } });
|
||||
});
|
||||
|
||||
it('maps a ferry into a BoatReservation, using the operator when no name is given', () => {
|
||||
const node = one({ type: 'ferry', operator: 'Stena Line', from_name: 'Kiel', to_name: 'Göteborg' });
|
||||
expect(node['@type']).toBe('BoatReservation');
|
||||
expect((node.reservationFor as Record<string, unknown>).name).toBe('Stena Line');
|
||||
});
|
||||
|
||||
it('maps a restaurant into a FoodEstablishmentReservation', () => {
|
||||
const node = one({ type: 'restaurant', name: 'Osteria', address: 'Via Roma 1', start_time: '2025-05-01T19:30:00' });
|
||||
expect(node['@type']).toBe('FoodEstablishmentReservation');
|
||||
expect(node.startTime).toBe('2025-05-01T19:30:00');
|
||||
expect((node.reservationFor as Record<string, unknown>).name).toBe('Osteria');
|
||||
});
|
||||
|
||||
it('maps an event into an EventReservation with a location', () => {
|
||||
const node = one({ type: 'event', name: 'Concert', address: 'Arena', start_time: '2025-05-01T20:00:00', end_time: '2025-05-01T23:00:00' });
|
||||
expect(node['@type']).toBe('EventReservation');
|
||||
expect(node.startTime).toBe('2025-05-01T20:00:00');
|
||||
expect(node.reservationFor).toMatchObject({ name: 'Concert', location: { address: 'Arena' } });
|
||||
});
|
||||
|
||||
it('uses the generic name fallback for a nameless restaurant/event with no address', () => {
|
||||
expect((one({ type: 'restaurant', start_time: '2025-05-01T19:30:00' }).reservationFor as Record<string, unknown>).name).toBe('Restaurant');
|
||||
expect((one({ type: 'event', start_time: '2025-05-01T20:00:00' }).reservationFor as Record<string, unknown>).name).toBe('Event');
|
||||
});
|
||||
|
||||
it('resolves GBP, JPY and a bare ISO code, and leaves an unrecognised currency undefined', () => {
|
||||
expect(one({ type: 'hotel', name: 'A', price: '£120.00' }).priceCurrency).toBe('GBP');
|
||||
expect(one({ type: 'event', name: 'B', price: '¥9,400' }).priceCurrency).toBe('JPY');
|
||||
expect(one({ type: 'hotel', name: 'C', currency: 'CHF', price: '200' }).priceCurrency).toBe('CHF');
|
||||
expect(one({ type: 'hotel', name: 'D', price: '200' }).priceCurrency).toBeUndefined();
|
||||
});
|
||||
|
||||
it('parses a plain number price, grouping without a decimal, and drops an unparseable amount', () => {
|
||||
expect(one({ type: 'hotel', name: 'A', price: 89 }).price).toBe(89);
|
||||
expect(one({ type: 'hotel', name: 'B', price: '1.580' }).price).toBe(1580); // dot is grouping, not a decimal
|
||||
expect(one({ type: 'hotel', name: 'C', price: 'free of charge' }).price).toBeUndefined();
|
||||
});
|
||||
|
||||
it('accepts a bare array of reservations', () => {
|
||||
const out = nuExtractToKiReservations([{ type: 'hotel', name: 'A' }, { type: 'train', from_name: 'X', to_name: 'Y' }]);
|
||||
expect(out.map((n) => n['@type'])).toEqual(['LodgingReservation', 'TrainReservation']);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -0,0 +1,82 @@
|
||||
import { describe, it, expect, vi, beforeEach } from 'vitest';
|
||||
import { toNativeBase, extractEnforced } from '../../../../src/nest/llm-parse/router/ollama-format.client';
|
||||
|
||||
function mockFetch(impl: (url: string, init: RequestInit) => Promise<Response> | Response) {
|
||||
const fn = vi.fn(impl as unknown as typeof fetch);
|
||||
vi.stubGlobal('fetch', fn);
|
||||
return fn;
|
||||
}
|
||||
|
||||
function jsonResponse(body: unknown, ok = true, status = 200): Response {
|
||||
return { ok, status, json: async () => body, text: async () => JSON.stringify(body) } as unknown as Response;
|
||||
}
|
||||
|
||||
const INPUT = {
|
||||
baseUrl: 'http://ollama:11434/v1',
|
||||
model: 'qwen3:8b',
|
||||
system: 'sys',
|
||||
user: 'doc',
|
||||
schema: { type: 'object' as const },
|
||||
};
|
||||
|
||||
beforeEach(() => vi.unstubAllGlobals());
|
||||
|
||||
describe('toNativeBase', () => {
|
||||
it('strips a /v1 suffix and trailing slashes', () => {
|
||||
expect(toNativeBase('http://ollama:11434/v1')).toBe('http://ollama:11434');
|
||||
expect(toNativeBase('http://ollama:11434/v1/')).toBe('http://ollama:11434');
|
||||
expect(toNativeBase('http://ollama:11434/')).toBe('http://ollama:11434');
|
||||
expect(toNativeBase('http://ollama:11434')).toBe('http://ollama:11434');
|
||||
});
|
||||
});
|
||||
|
||||
describe('extractEnforced', () => {
|
||||
it('posts to the native /api/chat with the grammar format and thinking disabled', async () => {
|
||||
const fetchFn = mockFetch(() => jsonResponse({ message: { content: '{"name":"Hotel"}' } }));
|
||||
const out = await extractEnforced(INPUT);
|
||||
expect(out).toEqual({ name: 'Hotel' });
|
||||
const [url, init] = fetchFn.mock.calls[0];
|
||||
expect(url).toBe('http://ollama:11434/api/chat');
|
||||
const body = JSON.parse((init as RequestInit).body as string);
|
||||
expect(body.format).toEqual({ type: 'object' });
|
||||
expect(body.think).toBe(false);
|
||||
expect(body.stream).toBe(false);
|
||||
expect(body.options.temperature).toBe(0);
|
||||
expect((init as RequestInit).headers).not.toHaveProperty('authorization');
|
||||
});
|
||||
|
||||
it('sends a bearer header only when an apiKey is given', async () => {
|
||||
const fetchFn = mockFetch(() => jsonResponse({ message: { content: '{}' } }));
|
||||
await extractEnforced({ ...INPUT, apiKey: 'sk-123', numPredict: 900, numCtx: 16000 });
|
||||
const init = fetchFn.mock.calls[0][1] as RequestInit;
|
||||
expect((init.headers as Record<string, string>).authorization).toBe('Bearer sk-123');
|
||||
const body = JSON.parse(init.body as string);
|
||||
expect(body.options.num_predict).toBe(900);
|
||||
expect(body.options.num_ctx).toBe(16000);
|
||||
});
|
||||
|
||||
it('strips a ```json code fence before parsing', async () => {
|
||||
mockFetch(() => jsonResponse({ message: { content: '```json\n{"a":1}\n```' } }));
|
||||
expect(await extractEnforced(INPUT)).toEqual({ a: 1 });
|
||||
});
|
||||
|
||||
it('returns null when the content parses to a non-object', async () => {
|
||||
mockFetch(() => jsonResponse({ message: { content: '"just a string"' } }));
|
||||
expect(await extractEnforced(INPUT)).toBeNull();
|
||||
});
|
||||
|
||||
it('returns null for unparseable content', async () => {
|
||||
mockFetch(() => jsonResponse({ message: { content: 'not json at all' } }));
|
||||
expect(await extractEnforced(INPUT)).toBeNull();
|
||||
});
|
||||
|
||||
it('returns null when the response has no content', async () => {
|
||||
mockFetch(() => jsonResponse({ message: {} }));
|
||||
expect(await extractEnforced(INPUT)).toBeNull();
|
||||
});
|
||||
|
||||
it('throws with the status when Ollama responds non-ok', async () => {
|
||||
mockFetch(() => jsonResponse({ error: 'model not found' }, false, 404));
|
||||
await expect(extractEnforced(INPUT)).rejects.toThrow(/Ollama \/api\/chat failed \(404\)/);
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user