mirror of
https://github.com/mauriceboe/TREK.git
synced 2026-06-30 18:46:00 +00:00
feat(extract): extract data using LLM
This commit is contained in:
@@ -0,0 +1,61 @@
|
||||
import { describe, it, expect, vi, beforeEach } from 'vitest';
|
||||
import { HttpException } from '@nestjs/common';
|
||||
import { BookingImportController } from '../../../../src/nest/booking-import/booking-import.controller';
|
||||
import type { BookingImportService } from '../../../../src/nest/booking-import/booking-import.service';
|
||||
import type { User } from '../../../../src/types';
|
||||
|
||||
const user = { id: 1, role: 'user' } as User;
|
||||
const file = (name = 'a.pdf') => ({ originalname: name, buffer: Buffer.from('x') } as Express.Multer.File);
|
||||
|
||||
function make(over: Partial<BookingImportService> = {}) {
|
||||
const svc = {
|
||||
verifyTripAccess: vi.fn(() => ({ user_id: 1 })),
|
||||
canEdit: vi.fn(() => true),
|
||||
isAvailable: vi.fn(() => true),
|
||||
aiAvailable: vi.fn(() => true),
|
||||
preview: vi.fn(async () => ({ items: [], warnings: [], files: [] })),
|
||||
...over,
|
||||
} as unknown as BookingImportService;
|
||||
return { c: new BookingImportController(svc), svc };
|
||||
}
|
||||
|
||||
async function status(fn: () => Promise<unknown>): Promise<number> {
|
||||
try { await fn(); } catch (e) { expect(e).toBeInstanceOf(HttpException); return (e as HttpException).getStatus(); }
|
||||
throw new Error('expected throw');
|
||||
}
|
||||
|
||||
beforeEach(() => vi.clearAllMocks());
|
||||
|
||||
describe('BookingImportController.preview', () => {
|
||||
it('rejects an invalid mode with 400', async () => {
|
||||
const { c } = make();
|
||||
expect(await status(() => c.preview(user, 't1', [file()], 'bogus'))).toBe(400);
|
||||
});
|
||||
|
||||
it('returns 409 for force-ai when AI is not configured', async () => {
|
||||
const { c } = make({ aiAvailable: vi.fn(() => false) as any });
|
||||
expect(await status(() => c.preview(user, 't1', [file()], 'force-ai'))).toBe(409);
|
||||
});
|
||||
|
||||
it('returns 503 for no-ai when the extractor is unavailable', async () => {
|
||||
const { c } = make({ isAvailable: vi.fn(() => false) as any });
|
||||
expect(await status(() => c.preview(user, 't1', [file()], 'no-ai'))).toBe(503);
|
||||
});
|
||||
|
||||
it('returns 400 when no files are uploaded', async () => {
|
||||
const { c } = make();
|
||||
expect(await status(() => c.preview(user, 't1', [], 'no-ai'))).toBe(400);
|
||||
});
|
||||
|
||||
it('passes the parsed mode and user id through to the service', async () => {
|
||||
const { c, svc } = make();
|
||||
await c.preview(user, 't1', [file()], 'fallback-on-empty');
|
||||
expect(svc.preview).toHaveBeenCalledWith([expect.anything()], 'fallback-on-empty', 1);
|
||||
});
|
||||
|
||||
it('defaults the mode to no-ai when omitted', async () => {
|
||||
const { c, svc } = make();
|
||||
await c.preview(user, 't1', [file()], undefined);
|
||||
expect(svc.preview).toHaveBeenCalledWith([expect.anything()], 'no-ai', 1);
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,79 @@
|
||||
import { describe, it, expect, vi, beforeEach } from 'vitest';
|
||||
import { HttpException } from '@nestjs/common';
|
||||
|
||||
// Mock the heavy side-effect imports so the service module loads cleanly; the
|
||||
// preview() path under test only touches the extractor + llmParse deps.
|
||||
vi.mock('../../../../src/db/database', () => ({ db: { prepare: vi.fn() }, closeDb: () => {}, reinitialize: () => {} }));
|
||||
vi.mock('../../../../src/websocket', () => ({ broadcast: vi.fn() }));
|
||||
vi.mock('../../../../src/services/permissions', () => ({ checkPermission: vi.fn(() => true) }));
|
||||
vi.mock('../../../../src/services/tripAccess', () => ({ verifyTripAccess: vi.fn() }));
|
||||
vi.mock('../../../../src/services/reservationService', () => ({ createReservation: vi.fn() }));
|
||||
vi.mock('../../../../src/services/placeService', () => ({ createPlace: vi.fn() }));
|
||||
vi.mock('../../../../src/services/mapsService', () => ({ searchNominatim: vi.fn() }));
|
||||
|
||||
import { BookingImportService } from '../../../../src/nest/booking-import/booking-import.service';
|
||||
|
||||
const HOTEL_KI = { '@type': 'LodgingReservation', reservationNumber: 'ABC', reservationFor: { name: 'Hotel X' }, checkinTime: '2026-06-11T15:00', checkoutTime: '2026-06-12T11:00' };
|
||||
const file = (name = 'a.pdf') => ({ buffer: Buffer.from('x'), originalname: name } as any);
|
||||
|
||||
function make(opts: { kit?: boolean; ai?: boolean; extract?: any; parse?: any }) {
|
||||
const extractor = { isAvailable: () => opts.kit ?? false, extract: vi.fn(opts.extract ?? (async () => [])) };
|
||||
const llmParse = { isAvailable: () => opts.ai ?? false, parse: vi.fn(opts.parse ?? (async () => ({ kiItems: [], warnings: [] }))) };
|
||||
return { svc: new BookingImportService(extractor as any, llmParse as any), extractor, llmParse };
|
||||
}
|
||||
|
||||
beforeEach(() => vi.clearAllMocks());
|
||||
|
||||
describe('BookingImportService.preview', () => {
|
||||
it('no-ai: maps kitinerary items, does not force needs_review, reports aiUsed:false', async () => {
|
||||
const { svc, llmParse } = make({ kit: true, ai: false, extract: async () => [HOTEL_KI] });
|
||||
const res = await svc.preview([file()], 'no-ai', 1);
|
||||
expect(res.items).toHaveLength(1);
|
||||
expect(res.items[0].needs_review).toBeFalsy();
|
||||
expect(res.files).toEqual([{ fileName: 'a.pdf', aiAvailable: false, aiUsed: false }]);
|
||||
expect(llmParse.parse).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('throws 503 when neither parser is available', async () => {
|
||||
const { svc } = make({ kit: false, ai: false });
|
||||
try {
|
||||
await svc.preview([file()], 'no-ai', 1);
|
||||
throw new Error('expected throw');
|
||||
} catch (err) {
|
||||
expect(err).toBeInstanceOf(HttpException);
|
||||
expect((err as HttpException).getStatus()).toBe(503);
|
||||
}
|
||||
});
|
||||
|
||||
it('fallback-on-empty: runs the LLM when kitinerary finds nothing and flags needs_review', async () => {
|
||||
const { svc, extractor, llmParse } = make({
|
||||
kit: true, ai: true,
|
||||
extract: async () => [],
|
||||
parse: async () => ({ kiItems: [HOTEL_KI], warnings: [] }),
|
||||
});
|
||||
const res = await svc.preview([file()], 'fallback-on-empty', 1);
|
||||
expect(extractor.extract).toHaveBeenCalled();
|
||||
expect(llmParse.parse).toHaveBeenCalled();
|
||||
expect(res.items).toHaveLength(1);
|
||||
expect(res.items[0].needs_review).toBe(true);
|
||||
expect(res.files![0]).toEqual({ fileName: 'a.pdf', aiAvailable: true, aiUsed: true });
|
||||
});
|
||||
|
||||
it('fallback-on-empty: skips the LLM when kitinerary already found items', async () => {
|
||||
const { svc, llmParse } = make({ kit: true, ai: true, extract: async () => [HOTEL_KI] });
|
||||
const res = await svc.preview([file()], 'fallback-on-empty', 1);
|
||||
expect(llmParse.parse).not.toHaveBeenCalled();
|
||||
expect(res.files![0].aiUsed).toBe(false);
|
||||
});
|
||||
|
||||
it('force-ai: skips kitinerary entirely and uses the LLM', async () => {
|
||||
const { svc, extractor, llmParse } = make({
|
||||
kit: true, ai: true,
|
||||
parse: async () => ({ kiItems: [HOTEL_KI], warnings: [] }),
|
||||
});
|
||||
const res = await svc.preview([file()], 'force-ai', 1);
|
||||
expect(extractor.extract).not.toHaveBeenCalled();
|
||||
expect(llmParse.parse).toHaveBeenCalled();
|
||||
expect(res.items[0].needs_review).toBe(true);
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,96 @@
|
||||
import { describe, it, expect, vi, beforeEach } from 'vitest';
|
||||
import { OpenAiCompatibleClient } from '../../../../src/nest/llm-parse/clients/openai-compatible.client';
|
||||
import { AnthropicClient } from '../../../../src/nest/llm-parse/clients/anthropic.client';
|
||||
import type { LlmExtractionInput } from '../../../../src/nest/llm-parse/llm-provider.interface';
|
||||
|
||||
const baseInput: LlmExtractionInput = {
|
||||
prompt: 'system',
|
||||
jsonSchema: { type: 'object' },
|
||||
model: 'm',
|
||||
text: 'Flight AB123',
|
||||
};
|
||||
|
||||
function mockFetch(impl: (url: string, init: RequestInit) => Promise<Response> | Response) {
|
||||
const fn = vi.fn(impl as any);
|
||||
vi.stubGlobal('fetch', fn);
|
||||
return fn;
|
||||
}
|
||||
|
||||
function jsonResponse(body: unknown, ok = true, status = 200): Response {
|
||||
return { ok, status, json: async () => body, text: async () => JSON.stringify(body) } as unknown as Response;
|
||||
}
|
||||
|
||||
beforeEach(() => vi.unstubAllGlobals());
|
||||
|
||||
describe('OpenAiCompatibleClient', () => {
|
||||
it('posts to {baseUrl}/chat/completions and returns the reservations array', async () => {
|
||||
const fetchFn = mockFetch(() =>
|
||||
jsonResponse({ choices: [{ message: { content: JSON.stringify({ reservations: [{ '@type': 'FlightReservation' }] }) } }] }),
|
||||
);
|
||||
const out = await new OpenAiCompatibleClient().extract({ ...baseInput, baseUrl: 'http://localhost:11434/v1/' });
|
||||
expect(out).toEqual([{ '@type': 'FlightReservation' }]);
|
||||
expect(fetchFn.mock.calls[0][0]).toBe('http://localhost:11434/v1/chat/completions');
|
||||
});
|
||||
|
||||
it('tolerates code-fenced JSON', async () => {
|
||||
mockFetch(() =>
|
||||
jsonResponse({ choices: [{ message: { content: '```json\n{"reservations":[{"@type":"TrainReservation"}]}\n```' } }] }),
|
||||
);
|
||||
const out = await new OpenAiCompatibleClient().extract(baseInput);
|
||||
expect(out).toEqual([{ '@type': 'TrainReservation' }]);
|
||||
});
|
||||
|
||||
it('returns [] on malformed content', async () => {
|
||||
mockFetch(() => jsonResponse({ choices: [{ message: { content: 'not json' } }] }));
|
||||
expect(await new OpenAiCompatibleClient().extract(baseInput)).toEqual([]);
|
||||
});
|
||||
|
||||
it('throws on non-2xx', async () => {
|
||||
mockFetch(() => jsonResponse({ error: 'bad' }, false, 401));
|
||||
await expect(new OpenAiCompatibleClient().extract(baseInput)).rejects.toThrow(/401/);
|
||||
});
|
||||
|
||||
it('sends an image natively as image_url but never a file/pdf part', async () => {
|
||||
const fetchFn = mockFetch(() => jsonResponse({ choices: [{ message: { content: '{"reservations":[]}' } }] }));
|
||||
await new OpenAiCompatibleClient().extract({ ...baseInput, file: { mimeType: 'image/png', data: Buffer.from('IMG') } });
|
||||
let parts = JSON.parse((fetchFn.mock.calls[0][1] as RequestInit).body as string).messages[1].content;
|
||||
expect(parts.some((p: any) => p.type === 'image_url')).toBe(true);
|
||||
expect(parts.some((p: any) => p.type === 'file')).toBe(false);
|
||||
|
||||
// A PDF must NOT be sent as a content part (Ollama rejects it).
|
||||
await new OpenAiCompatibleClient().extract({ ...baseInput, file: { mimeType: 'application/pdf', data: Buffer.from('PDF') } });
|
||||
parts = JSON.parse((fetchFn.mock.calls[1][1] as RequestInit).body as string).messages[1].content;
|
||||
expect(parts.every((p: any) => p.type !== 'file' && p.type !== 'image_url')).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe('AnthropicClient', () => {
|
||||
it('forces the emit_reservations tool and reads its input', async () => {
|
||||
const fetchFn = mockFetch(() =>
|
||||
jsonResponse({ stop_reason: 'tool_use', content: [{ type: 'tool_use', name: 'emit_reservations', input: { reservations: [{ '@type': 'LodgingReservation' }] } }] }),
|
||||
);
|
||||
const out = await new AnthropicClient().extract(baseInput);
|
||||
expect(out).toEqual([{ '@type': 'LodgingReservation' }]);
|
||||
const body = JSON.parse((fetchFn.mock.calls[0][1] as RequestInit).body as string);
|
||||
expect(body.tool_choice).toEqual({ type: 'tool', name: 'emit_reservations' });
|
||||
expect(body.tools[0].name).toBe('emit_reservations');
|
||||
});
|
||||
|
||||
it('throws on a refusal stop_reason', async () => {
|
||||
mockFetch(() => jsonResponse({ stop_reason: 'refusal', content: [] }));
|
||||
await expect(new AnthropicClient().extract(baseInput)).rejects.toThrow(/declined/i);
|
||||
});
|
||||
|
||||
it('throws on non-2xx', async () => {
|
||||
mockFetch(() => jsonResponse({ error: 'bad' }, false, 500));
|
||||
await expect(new AnthropicClient().extract(baseInput)).rejects.toThrow(/500/);
|
||||
});
|
||||
|
||||
it('sends a native pdf as a base64 document block', async () => {
|
||||
const fetchFn = mockFetch(() => jsonResponse({ content: [{ type: 'tool_use', name: 'emit_reservations', input: { reservations: [] } }] }));
|
||||
await new AnthropicClient().extract({ ...baseInput, file: { mimeType: 'application/pdf', data: Buffer.from('PDF') } });
|
||||
const body = JSON.parse((fetchFn.mock.calls[0][1] as RequestInit).body as string);
|
||||
const blocks = body.messages[0].content;
|
||||
expect(blocks.some((b: any) => b.type === 'document' && b.source.type === 'base64')).toBe(true);
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,67 @@
|
||||
import { describe, it, expect, vi, beforeEach } from 'vitest';
|
||||
|
||||
const { dbMock } = vi.hoisted(() => {
|
||||
const stmt = { get: vi.fn() };
|
||||
return { dbMock: { prepare: vi.fn(() => stmt), _stmt: stmt } };
|
||||
});
|
||||
vi.mock('../../../../src/db/database', () => ({ db: dbMock, closeDb: () => {}, reinitialize: () => {} }));
|
||||
|
||||
const { isAddonEnabled } = vi.hoisted(() => ({ isAddonEnabled: vi.fn() }));
|
||||
vi.mock('../../../../src/services/adminService', () => ({ isAddonEnabled }));
|
||||
|
||||
const { getUserSettings, getDecryptedUserSetting } = vi.hoisted(() => ({
|
||||
getUserSettings: vi.fn(() => ({}) as Record<string, unknown>),
|
||||
getDecryptedUserSetting: vi.fn(() => null as string | null),
|
||||
}));
|
||||
vi.mock('../../../../src/services/settingsService', () => ({ getUserSettings, getDecryptedUserSetting }));
|
||||
|
||||
import { resolveLlmConfig } from '../../../../src/nest/llm-parse/llm-config.resolver';
|
||||
|
||||
function setInstanceConfig(config: unknown) {
|
||||
dbMock._stmt.get.mockReturnValue(config === undefined ? undefined : { config: JSON.stringify(config) });
|
||||
}
|
||||
|
||||
beforeEach(() => {
|
||||
vi.clearAllMocks();
|
||||
isAddonEnabled.mockReturnValue(true);
|
||||
setInstanceConfig(undefined);
|
||||
getUserSettings.mockReturnValue({});
|
||||
getDecryptedUserSetting.mockReturnValue(null);
|
||||
});
|
||||
|
||||
describe('resolveLlmConfig', () => {
|
||||
it('returns null when the addon is disabled', () => {
|
||||
isAddonEnabled.mockReturnValue(false);
|
||||
expect(resolveLlmConfig(1)).toBeNull();
|
||||
});
|
||||
|
||||
it('uses instance config when present (and decrypts the key)', () => {
|
||||
setInstanceConfig({ provider: 'anthropic', model: 'claude-opus-4-8', apiKey: 'sk-plain', multimodal: true });
|
||||
expect(resolveLlmConfig(1)).toEqual({
|
||||
provider: 'anthropic',
|
||||
model: 'claude-opus-4-8',
|
||||
baseUrl: undefined,
|
||||
apiKey: 'sk-plain',
|
||||
multimodal: true,
|
||||
});
|
||||
});
|
||||
|
||||
it('falls back to per-user config when instance config is incomplete', () => {
|
||||
setInstanceConfig({ provider: 'anthropic' }); // no model → not usable
|
||||
getUserSettings.mockReturnValue({ llm_provider: 'local', llm_model: 'nuextract', llm_base_url: 'http://x/v1', llm_multimodal: true });
|
||||
getDecryptedUserSetting.mockReturnValue('user-key');
|
||||
expect(resolveLlmConfig(7)).toEqual({
|
||||
provider: 'local',
|
||||
model: 'nuextract',
|
||||
baseUrl: 'http://x/v1',
|
||||
apiKey: 'user-key',
|
||||
multimodal: true,
|
||||
});
|
||||
expect(getDecryptedUserSetting).toHaveBeenCalledWith(7, 'llm_api_key');
|
||||
});
|
||||
|
||||
it('returns null when neither instance nor user config is usable', () => {
|
||||
getUserSettings.mockReturnValue({ llm_provider: 'openai' }); // no model
|
||||
expect(resolveLlmConfig(1)).toBeNull();
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,60 @@
|
||||
import { describe, it, expect, vi, beforeEach } from 'vitest';
|
||||
import { HttpException } from '@nestjs/common';
|
||||
import { LlmLocalService } from '../../../../src/nest/llm-parse/llm-local.service';
|
||||
|
||||
const svc = () => new LlmLocalService();
|
||||
|
||||
function mockFetch(impl: any) {
|
||||
const fn = vi.fn(impl);
|
||||
vi.stubGlobal('fetch', fn);
|
||||
return fn;
|
||||
}
|
||||
|
||||
beforeEach(() => vi.unstubAllGlobals());
|
||||
|
||||
describe('LlmLocalService.ollamaRoot', () => {
|
||||
it('strips a trailing /v1 and slashes', () => {
|
||||
expect(svc().ollamaRoot('http://localhost:11434/v1')).toBe('http://localhost:11434');
|
||||
expect(svc().ollamaRoot('http://localhost:11434/v1/')).toBe('http://localhost:11434');
|
||||
expect(svc().ollamaRoot('http://host:1/')).toBe('http://host:1');
|
||||
});
|
||||
|
||||
it('defaults when no base URL is given', () => {
|
||||
expect(svc().ollamaRoot(undefined)).toBe('http://localhost:11434');
|
||||
});
|
||||
|
||||
it('rejects non-http(s) and invalid URLs', () => {
|
||||
expect(() => svc().ollamaRoot('ftp://x')).toThrow(HttpException);
|
||||
expect(() => svc().ollamaRoot('not a url')).toThrow(HttpException);
|
||||
});
|
||||
});
|
||||
|
||||
describe('LlmLocalService.listModels', () => {
|
||||
it('returns named models from /api/tags', async () => {
|
||||
const fetchFn = mockFetch(async () => ({ ok: true, json: async () => ({ models: [{ name: 'nuextract', size: 100 }, { name: '' }] }) }));
|
||||
const out = await svc().listModels('http://localhost:11434/v1');
|
||||
expect(out.models).toEqual([{ name: 'nuextract', size: 100 }]);
|
||||
expect(fetchFn.mock.calls[0][0]).toBe('http://localhost:11434/api/tags');
|
||||
});
|
||||
|
||||
it('502s when the server is unreachable', async () => {
|
||||
mockFetch(async () => { throw new Error('ECONNREFUSED'); });
|
||||
await expect(svc().listModels('http://localhost:11434')).rejects.toThrow(HttpException);
|
||||
});
|
||||
});
|
||||
|
||||
describe('LlmLocalService.pull', () => {
|
||||
it('requires a model', async () => {
|
||||
await expect(svc().pull('http://localhost:11434', '')).rejects.toThrow(HttpException);
|
||||
});
|
||||
|
||||
it('posts to /api/pull and returns the stream body', async () => {
|
||||
const body = {} as ReadableStream<Uint8Array>;
|
||||
const fetchFn = mockFetch(async () => ({ ok: true, body }));
|
||||
const out = await svc().pull('http://localhost:11434/v1', 'nuextract');
|
||||
expect(out).toBe(body);
|
||||
expect(fetchFn.mock.calls[0][0]).toBe('http://localhost:11434/api/pull');
|
||||
const init = fetchFn.mock.calls[0][1];
|
||||
expect(JSON.parse(init.body)).toEqual({ model: 'nuextract', stream: true });
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,116 @@
|
||||
import { describe, it, expect, vi, beforeEach } from 'vitest';
|
||||
|
||||
const { resolveLlmConfig } = vi.hoisted(() => ({ resolveLlmConfig: vi.fn() }));
|
||||
vi.mock('../../../../src/nest/llm-parse/llm-config.resolver', () => ({ resolveLlmConfig }));
|
||||
|
||||
const { createLlmClient, extract } = vi.hoisted(() => {
|
||||
const extract = vi.fn();
|
||||
return { createLlmClient: vi.fn(() => ({ extract })), extract };
|
||||
});
|
||||
vi.mock('../../../../src/nest/llm-parse/llm-client.factory', () => ({ createLlmClient }));
|
||||
|
||||
const { extractText } = vi.hoisted(() => ({ extractText: vi.fn(async () => 'Flight AB123') }));
|
||||
vi.mock('../../../../src/nest/llm-parse/text-extract', async (orig) => {
|
||||
const actual = await orig() as Record<string, unknown>;
|
||||
return { ...actual, extractText };
|
||||
});
|
||||
|
||||
import { LlmParseService } from '../../../../src/nest/llm-parse/llm-parse.service';
|
||||
|
||||
const cfg = (over: Record<string, unknown> = {}) => ({ provider: 'openai', model: 'm', multimodal: false, ...over });
|
||||
const svc = () => new LlmParseService();
|
||||
const file = (name: string, body = 'Flight AB123') => ({ buffer: Buffer.from(body), originalName: name });
|
||||
|
||||
beforeEach(() => {
|
||||
vi.clearAllMocks();
|
||||
resolveLlmConfig.mockReturnValue(cfg());
|
||||
extract.mockResolvedValue([{ '@type': 'FlightReservation' }]);
|
||||
extractText.mockResolvedValue('Flight AB123');
|
||||
});
|
||||
|
||||
describe('LlmParseService', () => {
|
||||
it('isAvailable reflects whether a config resolves', () => {
|
||||
resolveLlmConfig.mockReturnValueOnce(null);
|
||||
expect(svc().isAvailable(1)).toBe(false);
|
||||
expect(svc().isAvailable(1)).toBe(true);
|
||||
});
|
||||
|
||||
it('returns a not-configured warning when no config resolves', async () => {
|
||||
resolveLlmConfig.mockReturnValue(null);
|
||||
const res = await svc().parse(file('a.txt'), 1);
|
||||
expect(res.kiItems).toEqual([]);
|
||||
expect(res.warnings[0]).toMatch(/not configured/i);
|
||||
expect(extract).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('sends extracted text for a text-like file', async () => {
|
||||
const res = await svc().parse(file('a.txt'), 1);
|
||||
expect(res.kiItems).toEqual([{ '@type': 'FlightReservation' }]);
|
||||
const input = extract.mock.calls[0][0];
|
||||
expect(input.text).toBe('Flight AB123');
|
||||
expect(input.file).toBeUndefined();
|
||||
});
|
||||
|
||||
it('extracts text for a pdf on the OpenAI-compatible/local path (no native bytes)', async () => {
|
||||
extractText.mockResolvedValue('Hotel X');
|
||||
await svc().parse(file('a.pdf', '%PDF'), 1);
|
||||
const input = extract.mock.calls[0][0];
|
||||
expect(input.text).toBe('Hotel X');
|
||||
expect(input.file).toBeUndefined();
|
||||
});
|
||||
|
||||
it('sends a pdf as native bytes only for Anthropic', async () => {
|
||||
resolveLlmConfig.mockReturnValue(cfg({ provider: 'anthropic' }));
|
||||
await svc().parse(file('a.pdf', '%PDF'), 1);
|
||||
const input = extract.mock.calls[0][0];
|
||||
expect(input.file).toEqual({ mimeType: 'application/pdf', data: expect.any(Buffer) });
|
||||
expect(input.text).toBeUndefined();
|
||||
expect(extractText).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('warns when a pdf yields no readable text (e.g. a scan)', async () => {
|
||||
extractText.mockResolvedValue(' ');
|
||||
const res = await svc().parse(file('a.pdf', '%PDF'), 1);
|
||||
expect(res.kiItems).toEqual([]);
|
||||
expect(res.warnings[0]).toMatch(/no readable text/i);
|
||||
expect(extract).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('folds flattened type fields into reservationFor (small-model output)', async () => {
|
||||
extract.mockResolvedValue([{
|
||||
'@type': 'FlightReservation',
|
||||
reservationNumber: 'ABC',
|
||||
flightNumber: 'EZY1357',
|
||||
airline: { iataCode: 'EG' },
|
||||
departureAirport: { iataCode: 'GEG' },
|
||||
arrivalAirport: { iataCode: 'AMS' },
|
||||
departureTime: '2026-06-11T10:00:00',
|
||||
}]);
|
||||
const res = await svc().parse(file('a.txt'), 1);
|
||||
const item = res.kiItems[0] as any;
|
||||
expect(item.reservationNumber).toBe('ABC');
|
||||
expect(item.reservationFor).toMatchObject({ flightNumber: 'EZY1357', departureAirport: { iataCode: 'GEG' } });
|
||||
// root-level keys are not duplicated into reservationFor
|
||||
expect(item.reservationFor.reservationNumber).toBeUndefined();
|
||||
});
|
||||
|
||||
it('leaves already-nested reservationFor untouched', async () => {
|
||||
extract.mockResolvedValue([{ '@type': 'FlightReservation', reservationFor: { flightNumber: 'X1' } }]);
|
||||
const res = await svc().parse(file('a.txt'), 1);
|
||||
expect((res.kiItems[0] as any).reservationFor).toEqual({ flightNumber: 'X1' });
|
||||
});
|
||||
|
||||
it('drops nodes without a string @type and warns', async () => {
|
||||
extract.mockResolvedValue([{ '@type': 'FlightReservation' }, { foo: 'bar' }]);
|
||||
const res = await svc().parse(file('a.txt'), 1);
|
||||
expect(res.kiItems).toEqual([{ '@type': 'FlightReservation' }]);
|
||||
expect(res.warnings.some(w => /unrecognized/i.test(w))).toBe(true);
|
||||
});
|
||||
|
||||
it('degrades to a warning when the client throws', async () => {
|
||||
extract.mockRejectedValue(new Error('boom'));
|
||||
const res = await svc().parse(file('a.txt'), 1);
|
||||
expect(res.kiItems).toEqual([]);
|
||||
expect(res.warnings[0]).toMatch(/AI parsing failed/i);
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,26 @@
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { buildSystemPrompt, KI_RESERVATION_JSON_SCHEMA } from '../../../../src/nest/llm-parse/llm-prompt';
|
||||
import { KI_RESERVATION_TYPES } from '@trek/shared';
|
||||
|
||||
describe('llm-prompt', () => {
|
||||
it('names every recognized @type the mapper supports', () => {
|
||||
const prompt = buildSystemPrompt();
|
||||
for (const t of KI_RESERVATION_TYPES) expect(prompt).toContain(t);
|
||||
});
|
||||
|
||||
it('instructs JSON-only output wrapped in reservations', () => {
|
||||
const prompt = buildSystemPrompt();
|
||||
expect(prompt).toMatch(/"reservations"/);
|
||||
expect(prompt.toLowerCase()).toContain('iso 8601');
|
||||
});
|
||||
|
||||
it('exposes a strict-safe object-root JSON schema enumerating the types', () => {
|
||||
const schema = KI_RESERVATION_JSON_SCHEMA as any;
|
||||
expect(schema.type).toBe('object');
|
||||
expect(schema.additionalProperties).toBe(false);
|
||||
expect(schema.required).toContain('reservations');
|
||||
const item = schema.properties.reservations.items;
|
||||
expect(item.properties['@type'].enum).toEqual([...KI_RESERVATION_TYPES]);
|
||||
expect(item.required).toContain('@type');
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,40 @@
|
||||
import { describe, it, expect, vi } from 'vitest';
|
||||
|
||||
const { getText } = vi.hoisted(() => ({ getText: vi.fn(async () => ({ text: 'Hotel X — confirmation ABC' })) }));
|
||||
vi.mock('pdf-parse', () => ({
|
||||
PDFParse: class {
|
||||
getText = getText;
|
||||
destroy = vi.fn(async () => {});
|
||||
},
|
||||
}));
|
||||
|
||||
import { isTextLike, isPdf, extractText } from '../../../../src/nest/llm-parse/text-extract';
|
||||
|
||||
describe('text-extract', () => {
|
||||
it('classifies text-like and pdf extensions', () => {
|
||||
expect(isTextLike('a.txt')).toBe(true);
|
||||
expect(isTextLike('a.html')).toBe(true);
|
||||
expect(isTextLike('a.eml')).toBe(true);
|
||||
expect(isTextLike('a.pdf')).toBe(false);
|
||||
expect(isPdf('a.PDF')).toBe(true);
|
||||
expect(isPdf('a.txt')).toBe(false);
|
||||
});
|
||||
|
||||
it('decodes plain text', async () => {
|
||||
expect(await extractText(Buffer.from('hello world'), 'a.txt')).toBe('hello world');
|
||||
});
|
||||
|
||||
it('strips markup from html/eml', async () => {
|
||||
const html = '<html><style>x{}</style><body><p>Flight AB123</p><script>1</script></body></html>';
|
||||
const out = await extractText(Buffer.from(html), 'a.html');
|
||||
expect(out).toContain('Flight AB123');
|
||||
expect(out).not.toContain('<p>');
|
||||
expect(out).not.toContain('x{}');
|
||||
});
|
||||
|
||||
it('extracts the embedded text layer from a pdf', async () => {
|
||||
const out = await extractText(Buffer.from('%PDF-1.4'), 'a.pdf');
|
||||
expect(out).toBe('Hotel X — confirmation ABC');
|
||||
expect(getText).toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user