mirror of
https://github.com/mauriceboe/TREK.git
synced 2026-06-27 09:11:46 +00:00
156b8da37e
NuExtract isn't an instruct model — fed a plain chat prompt it just echoes the schema back. Detect a NuExtract model by id and talk to it the way the model cards document: the JSON template inlined in a single user message, no system prompt, no json_schema, temperature 0. Its flat result is mapped back to the same KiReservation shape the rest of the pipeline already uses, so nothing downstream changes; every other model keeps the generic prompt. Money is taken as a verbatim string and parsed locally (German "1.580,22 €" otherwise comes back as 1.49772), a rental car's pickup/return ride the from/to fields so a stray form label doesn't become the location, and a lodging with no name falls back to its address instead of being dropped.
144 lines
6.7 KiB
TypeScript
144 lines
6.7 KiB
TypeScript
import { describe, it, expect, vi, beforeEach } from 'vitest';
|
|
import { OpenAiCompatibleClient } from '../../../../src/nest/llm-parse/clients/openai-compatible.client';
|
|
import { AnthropicClient } from '../../../../src/nest/llm-parse/clients/anthropic.client';
|
|
import type { LlmExtractionInput } from '../../../../src/nest/llm-parse/llm-provider.interface';
|
|
|
|
const baseInput: LlmExtractionInput = {
|
|
prompt: 'system',
|
|
jsonSchema: { type: 'object' },
|
|
model: 'm',
|
|
text: 'Flight AB123',
|
|
};
|
|
|
|
function mockFetch(impl: (url: string, init: RequestInit) => Promise<Response> | Response) {
|
|
const fn = vi.fn(impl as any);
|
|
vi.stubGlobal('fetch', fn);
|
|
return fn;
|
|
}
|
|
|
|
function jsonResponse(body: unknown, ok = true, status = 200): Response {
|
|
return { ok, status, json: async () => body, text: async () => JSON.stringify(body) } as unknown as Response;
|
|
}
|
|
|
|
beforeEach(() => vi.unstubAllGlobals());
|
|
|
|
describe('OpenAiCompatibleClient', () => {
|
|
it('posts to {baseUrl}/chat/completions and returns the reservations array', async () => {
|
|
const fetchFn = mockFetch(() =>
|
|
jsonResponse({ choices: [{ message: { content: JSON.stringify({ reservations: [{ '@type': 'FlightReservation' }] }) } }] }),
|
|
);
|
|
const out = await new OpenAiCompatibleClient().extract({ ...baseInput, baseUrl: 'http://localhost:11434/v1/' });
|
|
expect(out).toEqual([{ '@type': 'FlightReservation' }]);
|
|
expect(fetchFn.mock.calls[0][0]).toBe('http://localhost:11434/v1/chat/completions');
|
|
});
|
|
|
|
it('tolerates code-fenced JSON', async () => {
|
|
mockFetch(() =>
|
|
jsonResponse({ choices: [{ message: { content: '```json\n{"reservations":[{"@type":"TrainReservation"}]}\n```' } }] }),
|
|
);
|
|
const out = await new OpenAiCompatibleClient().extract(baseInput);
|
|
expect(out).toEqual([{ '@type': 'TrainReservation' }]);
|
|
});
|
|
|
|
it('returns [] on malformed content', async () => {
|
|
mockFetch(() => jsonResponse({ choices: [{ message: { content: 'not json' } }] }));
|
|
expect(await new OpenAiCompatibleClient().extract(baseInput)).toEqual([]);
|
|
});
|
|
|
|
it('throws on non-2xx', async () => {
|
|
mockFetch(() => jsonResponse({ error: 'bad' }, false, 401));
|
|
await expect(new OpenAiCompatibleClient().extract(baseInput)).rejects.toThrow(/401/);
|
|
});
|
|
|
|
it('sends an image natively as image_url but never a file/pdf part', async () => {
|
|
const fetchFn = mockFetch(() => jsonResponse({ choices: [{ message: { content: '{"reservations":[]}' } }] }));
|
|
await new OpenAiCompatibleClient().extract({ ...baseInput, file: { mimeType: 'image/png', data: Buffer.from('IMG') } });
|
|
let parts = JSON.parse((fetchFn.mock.calls[0][1] as RequestInit).body as string).messages[1].content;
|
|
expect(parts.some((p: any) => p.type === 'image_url')).toBe(true);
|
|
expect(parts.some((p: any) => p.type === 'file')).toBe(false);
|
|
|
|
// A PDF must NOT be sent as a content part (Ollama rejects it).
|
|
await new OpenAiCompatibleClient().extract({ ...baseInput, file: { mimeType: 'application/pdf', data: Buffer.from('PDF') } });
|
|
parts = JSON.parse((fetchFn.mock.calls[1][1] as RequestInit).body as string).messages[1].content;
|
|
expect(parts.every((p: any) => p.type !== 'file' && p.type !== 'image_url')).toBe(true);
|
|
});
|
|
});
|
|
|
|
describe('OpenAiCompatibleClient — NuExtract path', () => {
|
|
it('inlines the template in one user message (no system, no response_format) and maps the flat result', async () => {
|
|
const fetchFn = mockFetch(() =>
|
|
jsonResponse({
|
|
choices: [
|
|
{
|
|
message: {
|
|
content: JSON.stringify({
|
|
reservations: [
|
|
{ type: 'hotel', name: 'B&B Hotel', booking_reference: '733', checkin_time: '2026-05-01T15:00:00', checkout_time: '2026-05-02T12:00:00' },
|
|
],
|
|
}),
|
|
},
|
|
},
|
|
],
|
|
}),
|
|
);
|
|
const out = await new OpenAiCompatibleClient().extract({ ...baseInput, model: 'hf.co/numind/NuExtract-2.0-2B-GGUF:latest', text: 'Hotel doc' });
|
|
|
|
expect(out).toEqual([
|
|
{
|
|
'@type': 'LodgingReservation',
|
|
reservationNumber: '733',
|
|
reservationFor: { name: 'B&B Hotel' },
|
|
checkinTime: '2026-05-01T15:00:00',
|
|
checkoutTime: '2026-05-02T12:00:00',
|
|
},
|
|
]);
|
|
|
|
const body = JSON.parse((fetchFn.mock.calls[0][1] as RequestInit).body as string);
|
|
expect(body.messages).toHaveLength(1);
|
|
expect(body.messages[0].role).toBe('user');
|
|
expect(body.messages[0].content[0].text.startsWith('# Template:')).toBe(true);
|
|
expect(body.messages[0].content[0].text.endsWith('Hotel doc')).toBe(true);
|
|
expect(body.temperature).toBe(0);
|
|
expect(body.response_format).toBeUndefined();
|
|
});
|
|
|
|
it('keeps the system prompt and response_format for non-NuExtract models', async () => {
|
|
const fetchFn = mockFetch(() => jsonResponse({ choices: [{ message: { content: '{"reservations":[]}' } }] }));
|
|
await new OpenAiCompatibleClient().extract({ ...baseInput, model: 'qwen2.5:7b' });
|
|
const body = JSON.parse((fetchFn.mock.calls[0][1] as RequestInit).body as string);
|
|
expect(body.messages[0].role).toBe('system');
|
|
expect(body.response_format).toBeDefined();
|
|
});
|
|
});
|
|
|
|
describe('AnthropicClient', () => {
|
|
it('forces the emit_reservations tool and reads its input', async () => {
|
|
const fetchFn = mockFetch(() =>
|
|
jsonResponse({ stop_reason: 'tool_use', content: [{ type: 'tool_use', name: 'emit_reservations', input: { reservations: [{ '@type': 'LodgingReservation' }] } }] }),
|
|
);
|
|
const out = await new AnthropicClient().extract(baseInput);
|
|
expect(out).toEqual([{ '@type': 'LodgingReservation' }]);
|
|
const body = JSON.parse((fetchFn.mock.calls[0][1] as RequestInit).body as string);
|
|
expect(body.tool_choice).toEqual({ type: 'tool', name: 'emit_reservations' });
|
|
expect(body.tools[0].name).toBe('emit_reservations');
|
|
});
|
|
|
|
it('throws on a refusal stop_reason', async () => {
|
|
mockFetch(() => jsonResponse({ stop_reason: 'refusal', content: [] }));
|
|
await expect(new AnthropicClient().extract(baseInput)).rejects.toThrow(/declined/i);
|
|
});
|
|
|
|
it('throws on non-2xx', async () => {
|
|
mockFetch(() => jsonResponse({ error: 'bad' }, false, 500));
|
|
await expect(new AnthropicClient().extract(baseInput)).rejects.toThrow(/500/);
|
|
});
|
|
|
|
it('sends a native pdf as a base64 document block', async () => {
|
|
const fetchFn = mockFetch(() => jsonResponse({ content: [{ type: 'tool_use', name: 'emit_reservations', input: { reservations: [] } }] }));
|
|
await new AnthropicClient().extract({ ...baseInput, file: { mimeType: 'application/pdf', data: Buffer.from('PDF') } });
|
|
const body = JSON.parse((fetchFn.mock.calls[0][1] as RequestInit).body as string);
|
|
const blocks = body.messages[0].content;
|
|
expect(blocks.some((b: any) => b.type === 'document' && b.source.type === 'base64')).toBe(true);
|
|
});
|
|
});
|