Files
TREK/server/tests/unit/nest/llm-parse/llm-parse.service.test.ts
T
Maurice 407bacf66e test(llm-parse): cover the extraction router, client factory and import jobs
The new LLM extraction router shipped with little branch coverage, dropping src/nest below the 80% gate. Add unit tests for routeExtraction (flights/single/union/error paths, deterministic booking-wide fill), the native Ollama format client, the provider factory, the local-router service path with its type-aware text cap, the flat->schema.org mapper's remaining reservation types, and the background import-jobs runner. Also remove the now-unused validate.ts (only its FlatLike type was still referenced; moved to flat-schemas).
2026-06-26 22:12:10 +02:00

170 lines
7.5 KiB
TypeScript

import { describe, it, expect, vi, beforeEach } from 'vitest';
const { resolveLlmConfig } = vi.hoisted(() => ({ resolveLlmConfig: vi.fn() }));
vi.mock('../../../../src/nest/llm-parse/llm-config.resolver', () => ({ resolveLlmConfig }));
const { createLlmClient, extract } = vi.hoisted(() => {
const extract = vi.fn();
return { createLlmClient: vi.fn(() => ({ extract })), extract };
});
vi.mock('../../../../src/nest/llm-parse/llm-client.factory', () => ({ createLlmClient }));
const { extractText } = vi.hoisted(() => ({ extractText: vi.fn(async () => 'Flight AB123') }));
vi.mock('../../../../src/nest/llm-parse/text-extract', async (orig) => {
const actual = await orig() as Record<string, unknown>;
return { ...actual, extractText };
});
const { routeExtraction, detectFlightNumbers } = vi.hoisted(() => ({
routeExtraction: vi.fn(),
detectFlightNumbers: vi.fn(() => [] as string[]),
}));
vi.mock('../../../../src/nest/llm-parse/router/extraction-router', () => ({ routeExtraction, detectFlightNumbers }));
import { LlmParseService } from '../../../../src/nest/llm-parse/llm-parse.service';
const cfg = (over: Record<string, unknown> = {}) => ({ provider: 'openai', model: 'm', multimodal: false, ...over });
const svc = () => new LlmParseService();
const file = (name: string, body = 'Flight AB123') => ({ buffer: Buffer.from(body), originalName: name });
beforeEach(() => {
vi.clearAllMocks();
resolveLlmConfig.mockReturnValue(cfg());
extract.mockResolvedValue([{ '@type': 'FlightReservation' }]);
extractText.mockResolvedValue('Flight AB123');
detectFlightNumbers.mockReturnValue([]);
routeExtraction.mockResolvedValue({ kiItems: [{ '@type': 'LodgingReservation' }], warnings: [] });
});
describe('LlmParseService', () => {
it('isAvailable reflects whether a config resolves', () => {
resolveLlmConfig.mockReturnValueOnce(null);
expect(svc().isAvailable(1)).toBe(false);
expect(svc().isAvailable(1)).toBe(true);
});
it('returns a not-configured warning when no config resolves', async () => {
resolveLlmConfig.mockReturnValue(null);
const res = await svc().parse(file('a.txt'), 1);
expect(res.kiItems).toEqual([]);
expect(res.warnings[0]).toMatch(/not configured/i);
expect(extract).not.toHaveBeenCalled();
});
it('sends extracted text for a text-like file', async () => {
const res = await svc().parse(file('a.txt'), 1);
expect(res.kiItems).toEqual([{ '@type': 'FlightReservation' }]);
const input = extract.mock.calls[0][0];
expect(input.text).toBe('Flight AB123');
expect(input.file).toBeUndefined();
});
it('extracts text for a pdf on the OpenAI-compatible/local path (no native bytes)', async () => {
extractText.mockResolvedValue('Hotel X');
await svc().parse(file('a.pdf', '%PDF'), 1);
const input = extract.mock.calls[0][0];
expect(input.text).toBe('Hotel X');
expect(input.file).toBeUndefined();
});
it('sends a pdf as native bytes only for Anthropic', async () => {
resolveLlmConfig.mockReturnValue(cfg({ provider: 'anthropic' }));
await svc().parse(file('a.pdf', '%PDF'), 1);
const input = extract.mock.calls[0][0];
expect(input.file).toEqual({ mimeType: 'application/pdf', data: expect.any(Buffer) });
expect(input.text).toBeUndefined();
expect(extractText).not.toHaveBeenCalled();
});
it('warns when a pdf yields no readable text (e.g. a scan)', async () => {
extractText.mockResolvedValue(' ');
const res = await svc().parse(file('a.pdf', '%PDF'), 1);
expect(res.kiItems).toEqual([]);
expect(res.warnings[0]).toMatch(/no readable text/i);
expect(extract).not.toHaveBeenCalled();
});
it('folds flattened type fields into reservationFor (small-model output)', async () => {
extract.mockResolvedValue([{
'@type': 'FlightReservation',
reservationNumber: 'ABC',
flightNumber: 'EZY1357',
airline: { iataCode: 'EG' },
departureAirport: { iataCode: 'GEG' },
arrivalAirport: { iataCode: 'AMS' },
departureTime: '2026-06-11T10:00:00',
}]);
const res = await svc().parse(file('a.txt'), 1);
const item = res.kiItems[0] as any;
expect(item.reservationNumber).toBe('ABC');
expect(item.reservationFor).toMatchObject({ flightNumber: 'EZY1357', departureAirport: { iataCode: 'GEG' } });
// root-level keys are not duplicated into reservationFor
expect(item.reservationFor.reservationNumber).toBeUndefined();
});
it('leaves already-nested reservationFor untouched', async () => {
extract.mockResolvedValue([{ '@type': 'FlightReservation', reservationFor: { flightNumber: 'X1' } }]);
const res = await svc().parse(file('a.txt'), 1);
expect((res.kiItems[0] as any).reservationFor).toEqual({ flightNumber: 'X1' });
});
it('drops nodes without a string @type and warns', async () => {
extract.mockResolvedValue([{ '@type': 'FlightReservation' }, { foo: 'bar' }]);
const res = await svc().parse(file('a.txt'), 1);
expect(res.kiItems).toEqual([{ '@type': 'FlightReservation' }]);
expect(res.warnings.some(w => /unrecognized/i.test(w))).toBe(true);
});
it('degrades to a warning when the client throws', async () => {
extract.mockRejectedValue(new Error('boom'));
const res = await svc().parse(file('a.txt'), 1);
expect(res.kiItems).toEqual([]);
expect(res.warnings[0]).toMatch(/AI parsing failed/i);
});
it('routes the local provider through the extraction router instead of the single-shot client', async () => {
resolveLlmConfig.mockReturnValue(cfg({ provider: 'local', baseUrl: 'http://ollama:11434/v1', apiKey: 'k' }));
extractText.mockResolvedValue('Hotel booking');
routeExtraction.mockResolvedValue({ kiItems: [{ '@type': 'LodgingReservation' }], warnings: ['note'] });
const res = await svc().parse(file('a.txt'), 1);
expect(res.kiItems).toEqual([{ '@type': 'LodgingReservation' }]);
expect(res.warnings).toEqual(['note']);
expect(extract).not.toHaveBeenCalled();
expect(routeExtraction).toHaveBeenCalledWith('Hotel booking', { baseUrl: 'http://ollama:11434/v1', model: 'm', apiKey: 'k' });
});
it('keeps the wide text cap (16k) for a local flight itinerary but tightens it (6k) otherwise', async () => {
const long = 'x'.repeat(7000);
extractText.mockResolvedValue(long);
resolveLlmConfig.mockReturnValue(cfg({ provider: 'local' }));
detectFlightNumbers.mockReturnValue(['AB123']);
await svc().parse(file('flights.txt'), 1);
expect(routeExtraction.mock.calls[0][0]).toHaveLength(7000); // under the 16k cap, untouched
vi.clearAllMocks();
resolveLlmConfig.mockReturnValue(cfg({ provider: 'local' }));
extractText.mockResolvedValue(long);
detectFlightNumbers.mockReturnValue([]);
routeExtraction.mockResolvedValue({ kiItems: [], warnings: [] });
await svc().parse(file('hotel.txt'), 1);
expect(routeExtraction.mock.calls[0][0]).toHaveLength(6000); // single booking → tighter cap
});
it('degrades to a warning when the local router throws', async () => {
resolveLlmConfig.mockReturnValue(cfg({ provider: 'local' }));
routeExtraction.mockRejectedValue(new Error('ollama down'));
const res = await svc().parse(file('a.txt'), 1);
expect(res.kiItems).toEqual([]);
expect(res.warnings[0]).toMatch(/AI parsing failed/i);
});
it('warns when the file cannot be read (text extraction throws)', async () => {
extractText.mockRejectedValue(new Error('corrupt pdf'));
const res = await svc().parse(file('a.pdf', '%PDF'), 1);
expect(res.kiItems).toEqual([]);
expect(res.warnings[0]).toMatch(/could not read file/i);
expect(res.warnings[0]).toContain('corrupt pdf');
});
});