From 156b8da37e3f7edb2e9fbfff50bc225eb25e0663 Mon Sep 17 00:00:00 2001 From: Maurice Date: Thu, 25 Jun 2026 10:27:01 +0200 Subject: [PATCH] feat(extract): drive NuExtract with its native template MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit NuExtract isn't an instruct model — fed a plain chat prompt it just echoes the schema back. Detect a NuExtract model by id and talk to it the way the model cards document: the JSON template inlined in a single user message, no system prompt, no json_schema, temperature 0. Its flat result is mapped back to the same KiReservation shape the rest of the pipeline already uses, so nothing downstream changes; every other model keeps the generic prompt. Money is taken as a verbatim string and parsed locally (German "1.580,22 €" otherwise comes back as 1.49772), a rental car's pickup/return ride the from/to fields so a stray form label doesn't become the location, and a lodging with no name falls back to its address instead of being dropped. --- .../src/nest/llm-parse/clients/nuextract.ts | 274 ++++++++++++++++++ .../clients/openai-compatible.client.ts | 66 +++-- .../tests/unit/nest/llm-parse/clients.test.ts | 47 +++ .../unit/nest/llm-parse/nuextract.test.ts | 168 +++++++++++ 4 files changed, 534 insertions(+), 21 deletions(-) create mode 100644 server/src/nest/llm-parse/clients/nuextract.ts create mode 100644 server/tests/unit/nest/llm-parse/nuextract.test.ts diff --git a/server/src/nest/llm-parse/clients/nuextract.ts b/server/src/nest/llm-parse/clients/nuextract.ts new file mode 100644 index 00000000..4b4ec466 --- /dev/null +++ b/server/src/nest/llm-parse/clients/nuextract.ts @@ -0,0 +1,274 @@ +/** + * NuExtract adapter for the OpenAI-compatible client. + * + * NuExtract (NuMind) is not an instruct model — it is fine-tuned to fill a JSON + * *template* whose leaf values are type tokens ("verbatim-string", "date-time", + * …). Fed a generic chat instruction it just echoes the schema back, which is + * why a plain prompt produces garbage. Run through Ollama/llama.cpp the template + * has to be embedded INLINE in the user message under a `# Template:` header + * (llama.cpp ignores vLLM's chat_template_kwargs), with temperature 0. + * + * Rather than ask NuExtract for the nested schema.org shape (its template format + * can't express per-@type conditional fields), we give it ONE flat union template + * — its sweet spot — and map the flat result back into the `KiReservation` shape + * the kitinerary mapper consumes, so the whole downstream pipeline is unchanged. + */ + +/** Detect a NuExtract model id (e.g. `hf.co/numind/NuExtract-2.0-2B-GGUF`, `nuextract`). */ +export function isNuExtractModel(model: string | undefined): boolean { + return !!model && /nuextract/i.test(model); +} + +/** + * Flat union template covering every reservation type. NuExtract fills the + * relevant fields and returns the rest as null, so one template serves all docs. + * + * Deliberately flat (a single reservation, not an array). A small NuExtract (the + * 2B) returns an empty result when handed a nested `{ reservations: [ … ] }` + * array-of-objects template, but extracts reliably from a single flat object — + * so this path yields one reservation per document. Multi-segment itineraries + * (round trips) are left to the generic instruct path (qwen/cloud), which the + * system prompt already drives to emit every leg. + */ +export const NUEXTRACT_TEMPLATE = { + type: ['flight', 'train', 'bus', 'ferry', 'car', 'hotel', 'restaurant', 'event'], + name: 'verbatim-string', + booking_reference: 'verbatim-string', + operator: 'verbatim-string', + vehicle_number: 'verbatim-string', + // Departure/arrival double as a rental car's pick-up/return (place + time) — a + // separate pickup_location field only tempted the model to grab a nearby form + // label ("Location Terminal") instead of the actual depot. + from_name: 'verbatim-string', + from_code: 'verbatim-string', + to_name: 'verbatim-string', + to_code: 'verbatim-string', + departure_time: 'date-time', + arrival_time: 'date-time', + address: 'verbatim-string', + checkin_time: 'date-time', + checkout_time: 'date-time', + start_time: 'date-time', + end_time: 'date-time', + telephone: 'verbatim-string', + website: 'verbatim-string', + seat: 'verbatim-string', + travel_class: 'verbatim-string', + platform: 'verbatim-string', + // Verbatim so we parse the localized number ourselves — asking the model for a + // JSON number turns "1.580,22 €" (German thousands/decimal) into 1.49772. + price: 'verbatim-string', + currency: 'verbatim-string', +}; + +/** + * Build the NuExtract user-turn text: the template (pretty-printed with the + * indent the model cards use) followed by the document, under a `# Template:` + * header. This is the exact inline format the GGUF model cards document. + */ +export function buildNuExtractUserText(documentText: string): string { + return `# Template:\n${JSON.stringify(NUEXTRACT_TEMPLATE, null, 4)}\n${documentText}`; +} + +/** NuExtract `type` token → schema.org reservation `@type`. */ +const TYPE_MAP: Record = { + flight: 'FlightReservation', + train: 'TrainReservation', + bus: 'BusReservation', + ferry: 'BoatReservation', + boat: 'BoatReservation', + cruise: 'BoatReservation', + car: 'RentalCarReservation', + hotel: 'LodgingReservation', + lodging: 'LodgingReservation', + restaurant: 'FoodEstablishmentReservation', + event: 'EventReservation', +}; + +/** Recursively drop null/undefined/blank leaves and the empty objects/arrays they leave behind. */ +function clean(value: unknown): unknown { + if (Array.isArray(value)) { + const arr = value.map(clean).filter((v) => v !== undefined); + return arr.length ? arr : undefined; + } + if (value && typeof value === 'object') { + const out: Record = {}; + for (const [k, v] of Object.entries(value)) { + const c = clean(v); + if (c !== undefined) out[k] = c; + } + return Object.keys(out).length ? out : undefined; + } + if (value === null || value === undefined) return undefined; + if (typeof value === 'string' && value.trim() === '') return undefined; + return value; +} + +/** + * Parse a localized money string into a plain number. Handles German + * ("1.580,22 €" → 1580.22) and English ("1,580.22"/"$89.00" → 89) grouping by + * treating the right-most separator as the decimal point. Returns null when there + * is no parseable amount. + */ +function parseAmount(raw: unknown): number | null { + if (typeof raw === 'number') return Number.isFinite(raw) ? raw : null; + if (typeof raw !== 'string') return null; + let s = raw.replace(/[^\d.,]/g, ''); + if (!s) return null; + const lastComma = s.lastIndexOf(','); + const lastDot = s.lastIndexOf('.'); + let decimal: ',' | '.' | null = null; + if (lastComma > -1 && lastDot > -1) { + decimal = lastComma > lastDot ? ',' : '.'; + } else if (lastComma > -1) { + // A single comma with ≤2 trailing digits is a decimal point; otherwise grouping. + const parts = s.split(','); + decimal = parts.length === 2 && parts[1].length <= 2 ? ',' : null; + } else if (lastDot > -1) { + const parts = s.split('.'); + decimal = parts.length === 2 && parts[1].length <= 2 ? '.' : null; + } + if (decimal) { + const grouping = decimal === ',' ? '.' : ','; + s = s.split(grouping).join('').replace(decimal, '.'); + } else { + s = s.replace(/[.,]/g, ''); + } + const n = Number(s); + return Number.isFinite(n) ? n : null; +} + +/** Resolve an ISO 4217 currency from a symbol or code found in either field. */ +function parseCurrency(...candidates: unknown[]): string | undefined { + for (const c of candidates) { + if (typeof c !== 'string') continue; + const s = c.toUpperCase(); + if (s.includes('€') || /\bEUR\b/.test(s)) return 'EUR'; + if (s.includes('£') || /\bGBP\b/.test(s)) return 'GBP'; + if (s.includes('$') || /\bUSD\b/.test(s)) return 'USD'; + const iso = s.match(/\b([A-Z]{3})\b/); + if (iso) return iso[1]; + } + return undefined; +} + +/** A venue's display name, falling back to the address (or a generic label) so a + * lodging/restaurant/event is never silently dropped when the model misses the name. */ +function nameOrFallback(x: Record, fallback: string): string { + const name = typeof x.name === 'string' ? x.name.trim() : ''; + if (name) return name; + const address = typeof x.address === 'string' ? x.address.trim() : ''; + if (address) return address.split(',')[0].trim(); + return fallback; +} + +/** Map one flat NuExtract reservation into a schema.org `KiReservation` node (or undefined). */ +function buildNode(x: Record): Record | undefined { + const atType = TYPE_MAP[String(x.type ?? '').toLowerCase().trim()]; + if (!atType) return undefined; + + const node: Record = { + '@type': atType, + reservationNumber: x.booking_reference, + seat: x.seat, + class: x.travel_class, + platform: x.platform, + price: parseAmount(x.price) ?? undefined, + priceCurrency: parseCurrency(x.currency, x.price), + }; + + switch (atType) { + case 'FlightReservation': + node.reservationFor = { + flightNumber: x.vehicle_number, + airline: x.operator ? { name: x.operator } : undefined, + departureAirport: { iataCode: x.from_code, name: x.from_name }, + arrivalAirport: { iataCode: x.to_code, name: x.to_name }, + departureTime: x.departure_time, + arrivalTime: x.arrival_time, + }; + break; + case 'TrainReservation': + node.reservationFor = { + trainNumber: x.vehicle_number, + departureStation: { name: x.from_name }, + arrivalStation: { name: x.to_name }, + departureTime: x.departure_time, + arrivalTime: x.arrival_time, + }; + break; + case 'BusReservation': + node.reservationFor = { + busNumber: x.vehicle_number, + departureBusStop: { name: x.from_name }, + arrivalBusStop: { name: x.to_name }, + departureTime: x.departure_time, + arrivalTime: x.arrival_time, + }; + break; + case 'BoatReservation': + node.reservationFor = { + name: x.name ?? x.operator, + departureBoatTerminal: { name: x.from_name }, + arrivalBoatTerminal: { name: x.to_name }, + departureTime: x.departure_time, + arrivalTime: x.arrival_time, + }; + break; + case 'LodgingReservation': + node.reservationFor = { name: nameOrFallback(x, 'Accommodation'), address: x.address, telephone: x.telephone, url: x.website }; + node.checkinTime = x.checkin_time; + node.checkoutTime = x.checkout_time; + break; + case 'FoodEstablishmentReservation': + node.reservationFor = { name: nameOrFallback(x, 'Restaurant'), address: x.address, telephone: x.telephone, url: x.website }; + node.startTime = x.start_time; + node.endTime = x.end_time; + break; + case 'RentalCarReservation': + // Pick-up / return ride the transport from/to fields (see template comment). + node.reservationFor = { name: x.name, rentalCompany: x.operator ? { name: x.operator } : undefined }; + node.pickupTime = x.departure_time; + node.dropoffTime = x.arrival_time; + node.pickupLocation = { name: x.from_name, address: x.address }; + node.dropoffLocation = { name: x.to_name }; + break; + case 'EventReservation': + node.reservationFor = { + name: nameOrFallback(x, 'Event'), + startDate: x.start_time, + endDate: x.end_time, + location: { address: x.address, telephone: x.telephone, url: x.website }, + }; + node.startTime = x.start_time; + node.endTime = x.end_time; + break; + } + + return clean(node) as Record | undefined; +} + +/** + * Convert a parsed NuExtract response into schema.org `KiReservation` nodes. + * Accepts the `{ reservations: [...] }` wrapper the template asks for, a bare + * array, or a single object. Unrecognized/empty entries are dropped. + */ +export function nuExtractToKiReservations(parsed: unknown): Record[] { + const wrapped = (parsed as { reservations?: unknown })?.reservations; + const list = Array.isArray(wrapped) + ? wrapped + : Array.isArray(parsed) + ? parsed + : parsed && typeof parsed === 'object' + ? [parsed] + : []; + + const out: Record[] = []; + for (const entry of list) { + if (entry && typeof entry === 'object') { + const node = buildNode(entry as Record); + if (node) out.push(node); + } + } + return out; +} diff --git a/server/src/nest/llm-parse/clients/openai-compatible.client.ts b/server/src/nest/llm-parse/clients/openai-compatible.client.ts index d996c573..a42a2aa5 100644 --- a/server/src/nest/llm-parse/clients/openai-compatible.client.ts +++ b/server/src/nest/llm-parse/clients/openai-compatible.client.ts @@ -1,4 +1,5 @@ import type { LlmExtractionClient, LlmExtractionInput } from '../llm-provider.interface'; +import { isNuExtractModel, buildNuExtractUserText, nuExtractToKiReservations } from './nuextract'; // Generous: a local CPU model (Ollama, no GPU) may cold-load several GB and then // take a few minutes on a longer document before the first token. @@ -11,19 +12,25 @@ const MAX_TOKENS = 4096; * which all expose `POST {baseUrl}/chat/completions`. Native binaries (PDF) are * sent as an OpenAI `file` content part; text goes as a text part. Uses the * global fetch (no SDK) to match the codebase's HTTP style. + * + * A NuExtract model (detected by id) takes a different request shape: the JSON + * template inlined in a single user message, no system prompt and no + * `response_format` (see ./nuextract.ts) — that's how the fine-tune expects to + * be driven; the generic instruct path applies to every other model. */ export class OpenAiCompatibleClient implements LlmExtractionClient { async extract(input: LlmExtractionInput): Promise[]> { const base = (input.baseUrl ?? 'https://api.openai.com/v1').replace(/\/+$/, ''); const url = `${base}/chat/completions`; + const nuextract = isNuExtractModel(input.model); - const userContent: unknown[] = [ - { type: 'text', text: input.text ? `${USER_TEXT}\n\n${input.text}` : USER_TEXT }, - ]; + const userContent: unknown[] = nuextract + ? [{ type: 'text', text: buildNuExtractUserText(input.text ?? '') }] + : [{ type: 'text', text: input.text ? `${USER_TEXT}\n\n${input.text}` : USER_TEXT }]; // Only genuine images go natively (as image_url) — OpenAI-compatible servers // (notably Ollama) reject `file`/PDF content parts. PDFs reach this client as // pre-extracted text (see llm-parse.service.ts), never as bytes. - if (input.file && input.file.mimeType.startsWith('image/')) { + if (!nuextract && input.file && input.file.mimeType.startsWith('image/')) { const b64 = input.file.data.toString('base64'); userContent.push({ type: 'image_url', @@ -37,14 +44,22 @@ export class OpenAiCompatibleClient implements LlmExtractionClient { // Extraction is a deterministic task — Ollama defaults to 0.7, which makes // small models (NuExtract) drop fields or return empty. Pin to 0. temperature: 0, - messages: [ - { role: 'system', content: input.prompt }, - { role: 'user', content: userContent }, - ], - response_format: { - type: 'json_schema', - json_schema: { name: 'reservations', schema: input.jsonSchema, strict: false }, - }, + // NuExtract wants the template (in the user turn) to be the only instruction + // — a system prompt or a json_schema grammar derails it. + messages: nuextract + ? [{ role: 'user', content: userContent }] + : [ + { role: 'system', content: input.prompt }, + { role: 'user', content: userContent }, + ], + ...(nuextract + ? {} + : { + response_format: { + type: 'json_schema' as const, + json_schema: { name: 'reservations', schema: input.jsonSchema, strict: false }, + }, + }), }; const controller = new AbortController(); @@ -73,22 +88,31 @@ export class OpenAiCompatibleClient implements LlmExtractionClient { choices?: { message?: { content?: string } }[]; }; const content = data.choices?.[0]?.message?.content; - return parseReservations(content); + return nuextract ? parseNuExtract(content) : parseReservations(content); } } +/** Strip code fences and JSON.parse; `null` on failure. */ +function parseJson(content: string | undefined | null): unknown { + if (!content) return null; + const stripped = content.trim().replace(/^```(?:json)?/i, '').replace(/```$/, '').trim(); + try { + return JSON.parse(stripped); + } catch { + return null; + } +} + +/** Parse a NuExtract response and map its flat template output to KiReservation nodes. */ +function parseNuExtract(content: string | undefined | null): Record[] { + return nuExtractToKiReservations(parseJson(content)); +} + const USER_TEXT = 'Extract every travel reservation from the following document as schema.org JSON-LD.'; /** Tolerant parse: strip code fences, JSON.parse, pull `reservations`. `[]` on failure. */ function parseReservations(content: string | undefined | null): Record[] { - if (!content) return []; - const stripped = content.trim().replace(/^```(?:json)?/i, '').replace(/```$/, '').trim(); - let parsed: unknown; - try { - parsed = JSON.parse(stripped); - } catch { - return []; - } + const parsed = parseJson(content); if (Array.isArray(parsed)) return parsed as Record[]; if (parsed && typeof parsed === 'object' && Array.isArray((parsed as { reservations?: unknown }).reservations)) { return (parsed as { reservations: Record[] }).reservations; diff --git a/server/tests/unit/nest/llm-parse/clients.test.ts b/server/tests/unit/nest/llm-parse/clients.test.ts index e5e20e61..01d0b081 100644 --- a/server/tests/unit/nest/llm-parse/clients.test.ts +++ b/server/tests/unit/nest/llm-parse/clients.test.ts @@ -64,6 +64,53 @@ describe('OpenAiCompatibleClient', () => { }); }); +describe('OpenAiCompatibleClient — NuExtract path', () => { + it('inlines the template in one user message (no system, no response_format) and maps the flat result', async () => { + const fetchFn = mockFetch(() => + jsonResponse({ + choices: [ + { + message: { + content: JSON.stringify({ + reservations: [ + { type: 'hotel', name: 'B&B Hotel', booking_reference: '733', checkin_time: '2026-05-01T15:00:00', checkout_time: '2026-05-02T12:00:00' }, + ], + }), + }, + }, + ], + }), + ); + const out = await new OpenAiCompatibleClient().extract({ ...baseInput, model: 'hf.co/numind/NuExtract-2.0-2B-GGUF:latest', text: 'Hotel doc' }); + + expect(out).toEqual([ + { + '@type': 'LodgingReservation', + reservationNumber: '733', + reservationFor: { name: 'B&B Hotel' }, + checkinTime: '2026-05-01T15:00:00', + checkoutTime: '2026-05-02T12:00:00', + }, + ]); + + const body = JSON.parse((fetchFn.mock.calls[0][1] as RequestInit).body as string); + expect(body.messages).toHaveLength(1); + expect(body.messages[0].role).toBe('user'); + expect(body.messages[0].content[0].text.startsWith('# Template:')).toBe(true); + expect(body.messages[0].content[0].text.endsWith('Hotel doc')).toBe(true); + expect(body.temperature).toBe(0); + expect(body.response_format).toBeUndefined(); + }); + + it('keeps the system prompt and response_format for non-NuExtract models', async () => { + const fetchFn = mockFetch(() => jsonResponse({ choices: [{ message: { content: '{"reservations":[]}' } }] })); + await new OpenAiCompatibleClient().extract({ ...baseInput, model: 'qwen2.5:7b' }); + const body = JSON.parse((fetchFn.mock.calls[0][1] as RequestInit).body as string); + expect(body.messages[0].role).toBe('system'); + expect(body.response_format).toBeDefined(); + }); +}); + describe('AnthropicClient', () => { it('forces the emit_reservations tool and reads its input', async () => { const fetchFn = mockFetch(() => diff --git a/server/tests/unit/nest/llm-parse/nuextract.test.ts b/server/tests/unit/nest/llm-parse/nuextract.test.ts new file mode 100644 index 00000000..fed00b76 --- /dev/null +++ b/server/tests/unit/nest/llm-parse/nuextract.test.ts @@ -0,0 +1,168 @@ +import { describe, it, expect } from 'vitest'; +import { + isNuExtractModel, + buildNuExtractUserText, + nuExtractToKiReservations, + NUEXTRACT_TEMPLATE, +} from '../../../../src/nest/llm-parse/clients/nuextract'; + +describe('isNuExtractModel', () => { + it('matches NuExtract ids case-insensitively', () => { + expect(isNuExtractModel('hf.co/numind/NuExtract-2.0-2B-GGUF:latest')).toBe(true); + expect(isNuExtractModel('hf.co/numind/NuExtract3-GGUF:Q4_K_M')).toBe(true); + expect(isNuExtractModel('nuextract')).toBe(true); + }); + it('does not match generic instruct models', () => { + expect(isNuExtractModel('qwen2.5:7b')).toBe(false); + expect(isNuExtractModel('gpt-4o')).toBe(false); + expect(isNuExtractModel(undefined)).toBe(false); + }); +}); + +describe('buildNuExtractUserText', () => { + it('inlines the template under a "# Template:" header followed by the document', () => { + const text = buildNuExtractUserText('Hotel confirmation 123'); + expect(text.startsWith('# Template:\n')).toBe(true); + expect(text).toContain('"verbatim-string"'); + expect(text).toContain(JSON.stringify(NUEXTRACT_TEMPLATE, null, 4)); + expect(text.endsWith('Hotel confirmation 123')).toBe(true); + }); +}); + +describe('nuExtractToKiReservations', () => { + it('maps a flat flight into a schema.org FlightReservation with from/to airports', () => { + const out = nuExtractToKiReservations({ + reservations: [ + { + type: 'flight', + name: 'LH 198', + booking_reference: '7XK2QP', + operator: 'Lufthansa', + vehicle_number: 'LH198', + from_name: 'Berlin Brandenburg (BER)', + from_code: 'BER', + to_name: 'Frankfurt am Main (FRA)', + to_code: 'FRA', + departure_time: '2026-07-12T08:35:00', + arrival_time: '2026-07-12T09:50:00', + pickup_location: null, + seat: '14A', + travel_class: 'Economy', + platform: null, + price: 149, + currency: 'EUR', + }, + ], + }); + expect(out).toEqual([ + { + '@type': 'FlightReservation', + reservationNumber: '7XK2QP', + seat: '14A', + class: 'Economy', + price: 149, + priceCurrency: 'EUR', + reservationFor: { + flightNumber: 'LH198', + airline: { name: 'Lufthansa' }, + departureAirport: { iataCode: 'BER', name: 'Berlin Brandenburg (BER)' }, + arrivalAirport: { iataCode: 'FRA', name: 'Frankfurt am Main (FRA)' }, + departureTime: '2026-07-12T08:35:00', + arrivalTime: '2026-07-12T09:50:00', + }, + }, + ]); + }); + + it('maps a hotel with check-in/out at the reservation root', () => { + const [node] = nuExtractToKiReservations({ + reservations: [ + { + type: 'hotel', + name: 'B&B Hotel Berlin-Airport', + booking_reference: '73365505188894', + address: 'Bertolt-Brecht-Allee 12, 12529 Schoenefeld', + checkin_time: '2026-05-01T15:00:00', + checkout_time: '2026-05-02T12:00:00', + from_name: null, + price: 89, + currency: 'EUR', + }, + ], + }); + expect(node).toEqual({ + '@type': 'LodgingReservation', + reservationNumber: '73365505188894', + price: 89, + priceCurrency: 'EUR', + reservationFor: { name: 'B&B Hotel Berlin-Airport', address: 'Bertolt-Brecht-Allee 12, 12529 Schoenefeld' }, + checkinTime: '2026-05-01T15:00:00', + checkoutTime: '2026-05-02T12:00:00', + }); + }); + + it('maps a rental car — pickup/return ride the from/to fields, money is parsed', () => { + const [node] = nuExtractToKiReservations([ + { + type: 'car', + name: 'VW Golf', + operator: 'SICILY BY CAR', + booking_reference: 'CAR1', + from_name: 'Catania Airport', + to_name: 'Palermo Airport', + departure_time: '2026-12-24T10:00:00', + arrival_time: '2026-12-29T10:00:00', + address: 'Via Roma 1', + price: '€215,50', + currency: '€', + }, + ]); + expect(node).toEqual({ + '@type': 'RentalCarReservation', + reservationNumber: 'CAR1', + price: 215.5, + priceCurrency: 'EUR', + reservationFor: { name: 'VW Golf', rentalCompany: { name: 'SICILY BY CAR' } }, + pickupTime: '2026-12-24T10:00:00', + dropoffTime: '2026-12-29T10:00:00', + pickupLocation: { name: 'Catania Airport', address: 'Via Roma 1' }, + dropoffLocation: { name: 'Palermo Airport' }, + }); + }); + + it('parses localized money strings and currency symbols', () => { + const [de] = nuExtractToKiReservations({ type: 'hotel', name: 'X', price: '1.580,22 €' }); + expect(de.price).toBe(1580.22); + expect(de.priceCurrency).toBe('EUR'); + const [en] = nuExtractToKiReservations({ type: 'hotel', name: 'Y', price: '$1,580.22' }); + expect(en.price).toBe(1580.22); + expect(en.priceCurrency).toBe('USD'); + const [plain] = nuExtractToKiReservations({ type: 'hotel', name: 'Z', price: 'EUR 89,00' }); + expect(plain.price).toBe(89); + expect(plain.priceCurrency).toBe('EUR'); + }); + + it('falls back to the address instead of dropping a nameless lodging', () => { + const [node] = nuExtractToKiReservations({ + type: 'hotel', + booking_reference: 'HMHJ9RTEEK', + address: "Via Aldo Moro, 47 n. 15, Quarto d'Altino", + }); + expect(node['@type']).toBe('LodgingReservation'); + expect((node.reservationFor as Record).name).toBe('Via Aldo Moro'); + }); + + it('accepts a bare object and drops unknown types', () => { + expect(nuExtractToKiReservations({ type: 'flight', from_name: 'A', to_name: 'B' })).toEqual([ + { + '@type': 'FlightReservation', + reservationFor: { + departureAirport: { name: 'A' }, + arrivalAirport: { name: 'B' }, + }, + }, + ]); + expect(nuExtractToKiReservations({ reservations: [{ type: 'spaceship' }] })).toEqual([]); + expect(nuExtractToKiReservations(null)).toEqual([]); + }); +});