mirror of
https://github.com/mauriceboe/TREK.git
synced 2026-06-28 01:31:47 +00:00
feat(extract): drive NuExtract with its native template
NuExtract isn't an instruct model — fed a plain chat prompt it just echoes the schema back. Detect a NuExtract model by id and talk to it the way the model cards document: the JSON template inlined in a single user message, no system prompt, no json_schema, temperature 0. Its flat result is mapped back to the same KiReservation shape the rest of the pipeline already uses, so nothing downstream changes; every other model keeps the generic prompt. Money is taken as a verbatim string and parsed locally (German "1.580,22 €" otherwise comes back as 1.49772), a rental car's pickup/return ride the from/to fields so a stray form label doesn't become the location, and a lodging with no name falls back to its address instead of being dropped.
This commit is contained in:
@@ -0,0 +1,274 @@
|
||||
/**
|
||||
* NuExtract adapter for the OpenAI-compatible client.
|
||||
*
|
||||
* NuExtract (NuMind) is not an instruct model — it is fine-tuned to fill a JSON
|
||||
* *template* whose leaf values are type tokens ("verbatim-string", "date-time",
|
||||
* …). Fed a generic chat instruction it just echoes the schema back, which is
|
||||
* why a plain prompt produces garbage. Run through Ollama/llama.cpp the template
|
||||
* has to be embedded INLINE in the user message under a `# Template:` header
|
||||
* (llama.cpp ignores vLLM's chat_template_kwargs), with temperature 0.
|
||||
*
|
||||
* Rather than ask NuExtract for the nested schema.org shape (its template format
|
||||
* can't express per-@type conditional fields), we give it ONE flat union template
|
||||
* — its sweet spot — and map the flat result back into the `KiReservation` shape
|
||||
* the kitinerary mapper consumes, so the whole downstream pipeline is unchanged.
|
||||
*/
|
||||
|
||||
/** Detect a NuExtract model id (e.g. `hf.co/numind/NuExtract-2.0-2B-GGUF`, `nuextract`). */
|
||||
export function isNuExtractModel(model: string | undefined): boolean {
|
||||
return !!model && /nuextract/i.test(model);
|
||||
}
|
||||
|
||||
/**
|
||||
* Flat union template covering every reservation type. NuExtract fills the
|
||||
* relevant fields and returns the rest as null, so one template serves all docs.
|
||||
*
|
||||
* Deliberately flat (a single reservation, not an array). A small NuExtract (the
|
||||
* 2B) returns an empty result when handed a nested `{ reservations: [ … ] }`
|
||||
* array-of-objects template, but extracts reliably from a single flat object —
|
||||
* so this path yields one reservation per document. Multi-segment itineraries
|
||||
* (round trips) are left to the generic instruct path (qwen/cloud), which the
|
||||
* system prompt already drives to emit every leg.
|
||||
*/
|
||||
export const NUEXTRACT_TEMPLATE = {
|
||||
type: ['flight', 'train', 'bus', 'ferry', 'car', 'hotel', 'restaurant', 'event'],
|
||||
name: 'verbatim-string',
|
||||
booking_reference: 'verbatim-string',
|
||||
operator: 'verbatim-string',
|
||||
vehicle_number: 'verbatim-string',
|
||||
// Departure/arrival double as a rental car's pick-up/return (place + time) — a
|
||||
// separate pickup_location field only tempted the model to grab a nearby form
|
||||
// label ("Location Terminal") instead of the actual depot.
|
||||
from_name: 'verbatim-string',
|
||||
from_code: 'verbatim-string',
|
||||
to_name: 'verbatim-string',
|
||||
to_code: 'verbatim-string',
|
||||
departure_time: 'date-time',
|
||||
arrival_time: 'date-time',
|
||||
address: 'verbatim-string',
|
||||
checkin_time: 'date-time',
|
||||
checkout_time: 'date-time',
|
||||
start_time: 'date-time',
|
||||
end_time: 'date-time',
|
||||
telephone: 'verbatim-string',
|
||||
website: 'verbatim-string',
|
||||
seat: 'verbatim-string',
|
||||
travel_class: 'verbatim-string',
|
||||
platform: 'verbatim-string',
|
||||
// Verbatim so we parse the localized number ourselves — asking the model for a
|
||||
// JSON number turns "1.580,22 €" (German thousands/decimal) into 1.49772.
|
||||
price: 'verbatim-string',
|
||||
currency: 'verbatim-string',
|
||||
};
|
||||
|
||||
/**
|
||||
* Build the NuExtract user-turn text: the template (pretty-printed with the
|
||||
* indent the model cards use) followed by the document, under a `# Template:`
|
||||
* header. This is the exact inline format the GGUF model cards document.
|
||||
*/
|
||||
export function buildNuExtractUserText(documentText: string): string {
|
||||
return `# Template:\n${JSON.stringify(NUEXTRACT_TEMPLATE, null, 4)}\n${documentText}`;
|
||||
}
|
||||
|
||||
/** NuExtract `type` token → schema.org reservation `@type`. */
|
||||
const TYPE_MAP: Record<string, string> = {
|
||||
flight: 'FlightReservation',
|
||||
train: 'TrainReservation',
|
||||
bus: 'BusReservation',
|
||||
ferry: 'BoatReservation',
|
||||
boat: 'BoatReservation',
|
||||
cruise: 'BoatReservation',
|
||||
car: 'RentalCarReservation',
|
||||
hotel: 'LodgingReservation',
|
||||
lodging: 'LodgingReservation',
|
||||
restaurant: 'FoodEstablishmentReservation',
|
||||
event: 'EventReservation',
|
||||
};
|
||||
|
||||
/** Recursively drop null/undefined/blank leaves and the empty objects/arrays they leave behind. */
|
||||
function clean(value: unknown): unknown {
|
||||
if (Array.isArray(value)) {
|
||||
const arr = value.map(clean).filter((v) => v !== undefined);
|
||||
return arr.length ? arr : undefined;
|
||||
}
|
||||
if (value && typeof value === 'object') {
|
||||
const out: Record<string, unknown> = {};
|
||||
for (const [k, v] of Object.entries(value)) {
|
||||
const c = clean(v);
|
||||
if (c !== undefined) out[k] = c;
|
||||
}
|
||||
return Object.keys(out).length ? out : undefined;
|
||||
}
|
||||
if (value === null || value === undefined) return undefined;
|
||||
if (typeof value === 'string' && value.trim() === '') return undefined;
|
||||
return value;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse a localized money string into a plain number. Handles German
|
||||
* ("1.580,22 €" → 1580.22) and English ("1,580.22"/"$89.00" → 89) grouping by
|
||||
* treating the right-most separator as the decimal point. Returns null when there
|
||||
* is no parseable amount.
|
||||
*/
|
||||
function parseAmount(raw: unknown): number | null {
|
||||
if (typeof raw === 'number') return Number.isFinite(raw) ? raw : null;
|
||||
if (typeof raw !== 'string') return null;
|
||||
let s = raw.replace(/[^\d.,]/g, '');
|
||||
if (!s) return null;
|
||||
const lastComma = s.lastIndexOf(',');
|
||||
const lastDot = s.lastIndexOf('.');
|
||||
let decimal: ',' | '.' | null = null;
|
||||
if (lastComma > -1 && lastDot > -1) {
|
||||
decimal = lastComma > lastDot ? ',' : '.';
|
||||
} else if (lastComma > -1) {
|
||||
// A single comma with ≤2 trailing digits is a decimal point; otherwise grouping.
|
||||
const parts = s.split(',');
|
||||
decimal = parts.length === 2 && parts[1].length <= 2 ? ',' : null;
|
||||
} else if (lastDot > -1) {
|
||||
const parts = s.split('.');
|
||||
decimal = parts.length === 2 && parts[1].length <= 2 ? '.' : null;
|
||||
}
|
||||
if (decimal) {
|
||||
const grouping = decimal === ',' ? '.' : ',';
|
||||
s = s.split(grouping).join('').replace(decimal, '.');
|
||||
} else {
|
||||
s = s.replace(/[.,]/g, '');
|
||||
}
|
||||
const n = Number(s);
|
||||
return Number.isFinite(n) ? n : null;
|
||||
}
|
||||
|
||||
/** Resolve an ISO 4217 currency from a symbol or code found in either field. */
|
||||
function parseCurrency(...candidates: unknown[]): string | undefined {
|
||||
for (const c of candidates) {
|
||||
if (typeof c !== 'string') continue;
|
||||
const s = c.toUpperCase();
|
||||
if (s.includes('€') || /\bEUR\b/.test(s)) return 'EUR';
|
||||
if (s.includes('£') || /\bGBP\b/.test(s)) return 'GBP';
|
||||
if (s.includes('$') || /\bUSD\b/.test(s)) return 'USD';
|
||||
const iso = s.match(/\b([A-Z]{3})\b/);
|
||||
if (iso) return iso[1];
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
/** A venue's display name, falling back to the address (or a generic label) so a
|
||||
* lodging/restaurant/event is never silently dropped when the model misses the name. */
|
||||
function nameOrFallback(x: Record<string, unknown>, fallback: string): string {
|
||||
const name = typeof x.name === 'string' ? x.name.trim() : '';
|
||||
if (name) return name;
|
||||
const address = typeof x.address === 'string' ? x.address.trim() : '';
|
||||
if (address) return address.split(',')[0].trim();
|
||||
return fallback;
|
||||
}
|
||||
|
||||
/** Map one flat NuExtract reservation into a schema.org `KiReservation` node (or undefined). */
|
||||
function buildNode(x: Record<string, unknown>): Record<string, unknown> | undefined {
|
||||
const atType = TYPE_MAP[String(x.type ?? '').toLowerCase().trim()];
|
||||
if (!atType) return undefined;
|
||||
|
||||
const node: Record<string, unknown> = {
|
||||
'@type': atType,
|
||||
reservationNumber: x.booking_reference,
|
||||
seat: x.seat,
|
||||
class: x.travel_class,
|
||||
platform: x.platform,
|
||||
price: parseAmount(x.price) ?? undefined,
|
||||
priceCurrency: parseCurrency(x.currency, x.price),
|
||||
};
|
||||
|
||||
switch (atType) {
|
||||
case 'FlightReservation':
|
||||
node.reservationFor = {
|
||||
flightNumber: x.vehicle_number,
|
||||
airline: x.operator ? { name: x.operator } : undefined,
|
||||
departureAirport: { iataCode: x.from_code, name: x.from_name },
|
||||
arrivalAirport: { iataCode: x.to_code, name: x.to_name },
|
||||
departureTime: x.departure_time,
|
||||
arrivalTime: x.arrival_time,
|
||||
};
|
||||
break;
|
||||
case 'TrainReservation':
|
||||
node.reservationFor = {
|
||||
trainNumber: x.vehicle_number,
|
||||
departureStation: { name: x.from_name },
|
||||
arrivalStation: { name: x.to_name },
|
||||
departureTime: x.departure_time,
|
||||
arrivalTime: x.arrival_time,
|
||||
};
|
||||
break;
|
||||
case 'BusReservation':
|
||||
node.reservationFor = {
|
||||
busNumber: x.vehicle_number,
|
||||
departureBusStop: { name: x.from_name },
|
||||
arrivalBusStop: { name: x.to_name },
|
||||
departureTime: x.departure_time,
|
||||
arrivalTime: x.arrival_time,
|
||||
};
|
||||
break;
|
||||
case 'BoatReservation':
|
||||
node.reservationFor = {
|
||||
name: x.name ?? x.operator,
|
||||
departureBoatTerminal: { name: x.from_name },
|
||||
arrivalBoatTerminal: { name: x.to_name },
|
||||
departureTime: x.departure_time,
|
||||
arrivalTime: x.arrival_time,
|
||||
};
|
||||
break;
|
||||
case 'LodgingReservation':
|
||||
node.reservationFor = { name: nameOrFallback(x, 'Accommodation'), address: x.address, telephone: x.telephone, url: x.website };
|
||||
node.checkinTime = x.checkin_time;
|
||||
node.checkoutTime = x.checkout_time;
|
||||
break;
|
||||
case 'FoodEstablishmentReservation':
|
||||
node.reservationFor = { name: nameOrFallback(x, 'Restaurant'), address: x.address, telephone: x.telephone, url: x.website };
|
||||
node.startTime = x.start_time;
|
||||
node.endTime = x.end_time;
|
||||
break;
|
||||
case 'RentalCarReservation':
|
||||
// Pick-up / return ride the transport from/to fields (see template comment).
|
||||
node.reservationFor = { name: x.name, rentalCompany: x.operator ? { name: x.operator } : undefined };
|
||||
node.pickupTime = x.departure_time;
|
||||
node.dropoffTime = x.arrival_time;
|
||||
node.pickupLocation = { name: x.from_name, address: x.address };
|
||||
node.dropoffLocation = { name: x.to_name };
|
||||
break;
|
||||
case 'EventReservation':
|
||||
node.reservationFor = {
|
||||
name: nameOrFallback(x, 'Event'),
|
||||
startDate: x.start_time,
|
||||
endDate: x.end_time,
|
||||
location: { address: x.address, telephone: x.telephone, url: x.website },
|
||||
};
|
||||
node.startTime = x.start_time;
|
||||
node.endTime = x.end_time;
|
||||
break;
|
||||
}
|
||||
|
||||
return clean(node) as Record<string, unknown> | undefined;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert a parsed NuExtract response into schema.org `KiReservation` nodes.
|
||||
* Accepts the `{ reservations: [...] }` wrapper the template asks for, a bare
|
||||
* array, or a single object. Unrecognized/empty entries are dropped.
|
||||
*/
|
||||
export function nuExtractToKiReservations(parsed: unknown): Record<string, unknown>[] {
|
||||
const wrapped = (parsed as { reservations?: unknown })?.reservations;
|
||||
const list = Array.isArray(wrapped)
|
||||
? wrapped
|
||||
: Array.isArray(parsed)
|
||||
? parsed
|
||||
: parsed && typeof parsed === 'object'
|
||||
? [parsed]
|
||||
: [];
|
||||
|
||||
const out: Record<string, unknown>[] = [];
|
||||
for (const entry of list) {
|
||||
if (entry && typeof entry === 'object') {
|
||||
const node = buildNode(entry as Record<string, unknown>);
|
||||
if (node) out.push(node);
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
@@ -1,4 +1,5 @@
|
||||
import type { LlmExtractionClient, LlmExtractionInput } from '../llm-provider.interface';
|
||||
import { isNuExtractModel, buildNuExtractUserText, nuExtractToKiReservations } from './nuextract';
|
||||
|
||||
// Generous: a local CPU model (Ollama, no GPU) may cold-load several GB and then
|
||||
// take a few minutes on a longer document before the first token.
|
||||
@@ -11,19 +12,25 @@ const MAX_TOKENS = 4096;
|
||||
* which all expose `POST {baseUrl}/chat/completions`. Native binaries (PDF) are
|
||||
* sent as an OpenAI `file` content part; text goes as a text part. Uses the
|
||||
* global fetch (no SDK) to match the codebase's HTTP style.
|
||||
*
|
||||
* A NuExtract model (detected by id) takes a different request shape: the JSON
|
||||
* template inlined in a single user message, no system prompt and no
|
||||
* `response_format` (see ./nuextract.ts) — that's how the fine-tune expects to
|
||||
* be driven; the generic instruct path applies to every other model.
|
||||
*/
|
||||
export class OpenAiCompatibleClient implements LlmExtractionClient {
|
||||
async extract(input: LlmExtractionInput): Promise<Record<string, unknown>[]> {
|
||||
const base = (input.baseUrl ?? 'https://api.openai.com/v1').replace(/\/+$/, '');
|
||||
const url = `${base}/chat/completions`;
|
||||
const nuextract = isNuExtractModel(input.model);
|
||||
|
||||
const userContent: unknown[] = [
|
||||
{ type: 'text', text: input.text ? `${USER_TEXT}\n\n${input.text}` : USER_TEXT },
|
||||
];
|
||||
const userContent: unknown[] = nuextract
|
||||
? [{ type: 'text', text: buildNuExtractUserText(input.text ?? '') }]
|
||||
: [{ type: 'text', text: input.text ? `${USER_TEXT}\n\n${input.text}` : USER_TEXT }];
|
||||
// Only genuine images go natively (as image_url) — OpenAI-compatible servers
|
||||
// (notably Ollama) reject `file`/PDF content parts. PDFs reach this client as
|
||||
// pre-extracted text (see llm-parse.service.ts), never as bytes.
|
||||
if (input.file && input.file.mimeType.startsWith('image/')) {
|
||||
if (!nuextract && input.file && input.file.mimeType.startsWith('image/')) {
|
||||
const b64 = input.file.data.toString('base64');
|
||||
userContent.push({
|
||||
type: 'image_url',
|
||||
@@ -37,14 +44,22 @@ export class OpenAiCompatibleClient implements LlmExtractionClient {
|
||||
// Extraction is a deterministic task — Ollama defaults to 0.7, which makes
|
||||
// small models (NuExtract) drop fields or return empty. Pin to 0.
|
||||
temperature: 0,
|
||||
messages: [
|
||||
{ role: 'system', content: input.prompt },
|
||||
{ role: 'user', content: userContent },
|
||||
],
|
||||
response_format: {
|
||||
type: 'json_schema',
|
||||
json_schema: { name: 'reservations', schema: input.jsonSchema, strict: false },
|
||||
},
|
||||
// NuExtract wants the template (in the user turn) to be the only instruction
|
||||
// — a system prompt or a json_schema grammar derails it.
|
||||
messages: nuextract
|
||||
? [{ role: 'user', content: userContent }]
|
||||
: [
|
||||
{ role: 'system', content: input.prompt },
|
||||
{ role: 'user', content: userContent },
|
||||
],
|
||||
...(nuextract
|
||||
? {}
|
||||
: {
|
||||
response_format: {
|
||||
type: 'json_schema' as const,
|
||||
json_schema: { name: 'reservations', schema: input.jsonSchema, strict: false },
|
||||
},
|
||||
}),
|
||||
};
|
||||
|
||||
const controller = new AbortController();
|
||||
@@ -73,22 +88,31 @@ export class OpenAiCompatibleClient implements LlmExtractionClient {
|
||||
choices?: { message?: { content?: string } }[];
|
||||
};
|
||||
const content = data.choices?.[0]?.message?.content;
|
||||
return parseReservations(content);
|
||||
return nuextract ? parseNuExtract(content) : parseReservations(content);
|
||||
}
|
||||
}
|
||||
|
||||
/** Strip code fences and JSON.parse; `null` on failure. */
|
||||
function parseJson(content: string | undefined | null): unknown {
|
||||
if (!content) return null;
|
||||
const stripped = content.trim().replace(/^```(?:json)?/i, '').replace(/```$/, '').trim();
|
||||
try {
|
||||
return JSON.parse(stripped);
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/** Parse a NuExtract response and map its flat template output to KiReservation nodes. */
|
||||
function parseNuExtract(content: string | undefined | null): Record<string, unknown>[] {
|
||||
return nuExtractToKiReservations(parseJson(content));
|
||||
}
|
||||
|
||||
const USER_TEXT = 'Extract every travel reservation from the following document as schema.org JSON-LD.';
|
||||
|
||||
/** Tolerant parse: strip code fences, JSON.parse, pull `reservations`. `[]` on failure. */
|
||||
function parseReservations(content: string | undefined | null): Record<string, unknown>[] {
|
||||
if (!content) return [];
|
||||
const stripped = content.trim().replace(/^```(?:json)?/i, '').replace(/```$/, '').trim();
|
||||
let parsed: unknown;
|
||||
try {
|
||||
parsed = JSON.parse(stripped);
|
||||
} catch {
|
||||
return [];
|
||||
}
|
||||
const parsed = parseJson(content);
|
||||
if (Array.isArray(parsed)) return parsed as Record<string, unknown>[];
|
||||
if (parsed && typeof parsed === 'object' && Array.isArray((parsed as { reservations?: unknown }).reservations)) {
|
||||
return (parsed as { reservations: Record<string, unknown>[] }).reservations;
|
||||
|
||||
@@ -64,6 +64,53 @@ describe('OpenAiCompatibleClient', () => {
|
||||
});
|
||||
});
|
||||
|
||||
describe('OpenAiCompatibleClient — NuExtract path', () => {
|
||||
it('inlines the template in one user message (no system, no response_format) and maps the flat result', async () => {
|
||||
const fetchFn = mockFetch(() =>
|
||||
jsonResponse({
|
||||
choices: [
|
||||
{
|
||||
message: {
|
||||
content: JSON.stringify({
|
||||
reservations: [
|
||||
{ type: 'hotel', name: 'B&B Hotel', booking_reference: '733', checkin_time: '2026-05-01T15:00:00', checkout_time: '2026-05-02T12:00:00' },
|
||||
],
|
||||
}),
|
||||
},
|
||||
},
|
||||
],
|
||||
}),
|
||||
);
|
||||
const out = await new OpenAiCompatibleClient().extract({ ...baseInput, model: 'hf.co/numind/NuExtract-2.0-2B-GGUF:latest', text: 'Hotel doc' });
|
||||
|
||||
expect(out).toEqual([
|
||||
{
|
||||
'@type': 'LodgingReservation',
|
||||
reservationNumber: '733',
|
||||
reservationFor: { name: 'B&B Hotel' },
|
||||
checkinTime: '2026-05-01T15:00:00',
|
||||
checkoutTime: '2026-05-02T12:00:00',
|
||||
},
|
||||
]);
|
||||
|
||||
const body = JSON.parse((fetchFn.mock.calls[0][1] as RequestInit).body as string);
|
||||
expect(body.messages).toHaveLength(1);
|
||||
expect(body.messages[0].role).toBe('user');
|
||||
expect(body.messages[0].content[0].text.startsWith('# Template:')).toBe(true);
|
||||
expect(body.messages[0].content[0].text.endsWith('Hotel doc')).toBe(true);
|
||||
expect(body.temperature).toBe(0);
|
||||
expect(body.response_format).toBeUndefined();
|
||||
});
|
||||
|
||||
it('keeps the system prompt and response_format for non-NuExtract models', async () => {
|
||||
const fetchFn = mockFetch(() => jsonResponse({ choices: [{ message: { content: '{"reservations":[]}' } }] }));
|
||||
await new OpenAiCompatibleClient().extract({ ...baseInput, model: 'qwen2.5:7b' });
|
||||
const body = JSON.parse((fetchFn.mock.calls[0][1] as RequestInit).body as string);
|
||||
expect(body.messages[0].role).toBe('system');
|
||||
expect(body.response_format).toBeDefined();
|
||||
});
|
||||
});
|
||||
|
||||
describe('AnthropicClient', () => {
|
||||
it('forces the emit_reservations tool and reads its input', async () => {
|
||||
const fetchFn = mockFetch(() =>
|
||||
|
||||
@@ -0,0 +1,168 @@
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import {
|
||||
isNuExtractModel,
|
||||
buildNuExtractUserText,
|
||||
nuExtractToKiReservations,
|
||||
NUEXTRACT_TEMPLATE,
|
||||
} from '../../../../src/nest/llm-parse/clients/nuextract';
|
||||
|
||||
describe('isNuExtractModel', () => {
|
||||
it('matches NuExtract ids case-insensitively', () => {
|
||||
expect(isNuExtractModel('hf.co/numind/NuExtract-2.0-2B-GGUF:latest')).toBe(true);
|
||||
expect(isNuExtractModel('hf.co/numind/NuExtract3-GGUF:Q4_K_M')).toBe(true);
|
||||
expect(isNuExtractModel('nuextract')).toBe(true);
|
||||
});
|
||||
it('does not match generic instruct models', () => {
|
||||
expect(isNuExtractModel('qwen2.5:7b')).toBe(false);
|
||||
expect(isNuExtractModel('gpt-4o')).toBe(false);
|
||||
expect(isNuExtractModel(undefined)).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe('buildNuExtractUserText', () => {
|
||||
it('inlines the template under a "# Template:" header followed by the document', () => {
|
||||
const text = buildNuExtractUserText('Hotel confirmation 123');
|
||||
expect(text.startsWith('# Template:\n')).toBe(true);
|
||||
expect(text).toContain('"verbatim-string"');
|
||||
expect(text).toContain(JSON.stringify(NUEXTRACT_TEMPLATE, null, 4));
|
||||
expect(text.endsWith('Hotel confirmation 123')).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe('nuExtractToKiReservations', () => {
|
||||
it('maps a flat flight into a schema.org FlightReservation with from/to airports', () => {
|
||||
const out = nuExtractToKiReservations({
|
||||
reservations: [
|
||||
{
|
||||
type: 'flight',
|
||||
name: 'LH 198',
|
||||
booking_reference: '7XK2QP',
|
||||
operator: 'Lufthansa',
|
||||
vehicle_number: 'LH198',
|
||||
from_name: 'Berlin Brandenburg (BER)',
|
||||
from_code: 'BER',
|
||||
to_name: 'Frankfurt am Main (FRA)',
|
||||
to_code: 'FRA',
|
||||
departure_time: '2026-07-12T08:35:00',
|
||||
arrival_time: '2026-07-12T09:50:00',
|
||||
pickup_location: null,
|
||||
seat: '14A',
|
||||
travel_class: 'Economy',
|
||||
platform: null,
|
||||
price: 149,
|
||||
currency: 'EUR',
|
||||
},
|
||||
],
|
||||
});
|
||||
expect(out).toEqual([
|
||||
{
|
||||
'@type': 'FlightReservation',
|
||||
reservationNumber: '7XK2QP',
|
||||
seat: '14A',
|
||||
class: 'Economy',
|
||||
price: 149,
|
||||
priceCurrency: 'EUR',
|
||||
reservationFor: {
|
||||
flightNumber: 'LH198',
|
||||
airline: { name: 'Lufthansa' },
|
||||
departureAirport: { iataCode: 'BER', name: 'Berlin Brandenburg (BER)' },
|
||||
arrivalAirport: { iataCode: 'FRA', name: 'Frankfurt am Main (FRA)' },
|
||||
departureTime: '2026-07-12T08:35:00',
|
||||
arrivalTime: '2026-07-12T09:50:00',
|
||||
},
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it('maps a hotel with check-in/out at the reservation root', () => {
|
||||
const [node] = nuExtractToKiReservations({
|
||||
reservations: [
|
||||
{
|
||||
type: 'hotel',
|
||||
name: 'B&B Hotel Berlin-Airport',
|
||||
booking_reference: '73365505188894',
|
||||
address: 'Bertolt-Brecht-Allee 12, 12529 Schoenefeld',
|
||||
checkin_time: '2026-05-01T15:00:00',
|
||||
checkout_time: '2026-05-02T12:00:00',
|
||||
from_name: null,
|
||||
price: 89,
|
||||
currency: 'EUR',
|
||||
},
|
||||
],
|
||||
});
|
||||
expect(node).toEqual({
|
||||
'@type': 'LodgingReservation',
|
||||
reservationNumber: '73365505188894',
|
||||
price: 89,
|
||||
priceCurrency: 'EUR',
|
||||
reservationFor: { name: 'B&B Hotel Berlin-Airport', address: 'Bertolt-Brecht-Allee 12, 12529 Schoenefeld' },
|
||||
checkinTime: '2026-05-01T15:00:00',
|
||||
checkoutTime: '2026-05-02T12:00:00',
|
||||
});
|
||||
});
|
||||
|
||||
it('maps a rental car — pickup/return ride the from/to fields, money is parsed', () => {
|
||||
const [node] = nuExtractToKiReservations([
|
||||
{
|
||||
type: 'car',
|
||||
name: 'VW Golf',
|
||||
operator: 'SICILY BY CAR',
|
||||
booking_reference: 'CAR1',
|
||||
from_name: 'Catania Airport',
|
||||
to_name: 'Palermo Airport',
|
||||
departure_time: '2026-12-24T10:00:00',
|
||||
arrival_time: '2026-12-29T10:00:00',
|
||||
address: 'Via Roma 1',
|
||||
price: '€215,50',
|
||||
currency: '€',
|
||||
},
|
||||
]);
|
||||
expect(node).toEqual({
|
||||
'@type': 'RentalCarReservation',
|
||||
reservationNumber: 'CAR1',
|
||||
price: 215.5,
|
||||
priceCurrency: 'EUR',
|
||||
reservationFor: { name: 'VW Golf', rentalCompany: { name: 'SICILY BY CAR' } },
|
||||
pickupTime: '2026-12-24T10:00:00',
|
||||
dropoffTime: '2026-12-29T10:00:00',
|
||||
pickupLocation: { name: 'Catania Airport', address: 'Via Roma 1' },
|
||||
dropoffLocation: { name: 'Palermo Airport' },
|
||||
});
|
||||
});
|
||||
|
||||
it('parses localized money strings and currency symbols', () => {
|
||||
const [de] = nuExtractToKiReservations({ type: 'hotel', name: 'X', price: '1.580,22 €' });
|
||||
expect(de.price).toBe(1580.22);
|
||||
expect(de.priceCurrency).toBe('EUR');
|
||||
const [en] = nuExtractToKiReservations({ type: 'hotel', name: 'Y', price: '$1,580.22' });
|
||||
expect(en.price).toBe(1580.22);
|
||||
expect(en.priceCurrency).toBe('USD');
|
||||
const [plain] = nuExtractToKiReservations({ type: 'hotel', name: 'Z', price: 'EUR 89,00' });
|
||||
expect(plain.price).toBe(89);
|
||||
expect(plain.priceCurrency).toBe('EUR');
|
||||
});
|
||||
|
||||
it('falls back to the address instead of dropping a nameless lodging', () => {
|
||||
const [node] = nuExtractToKiReservations({
|
||||
type: 'hotel',
|
||||
booking_reference: 'HMHJ9RTEEK',
|
||||
address: "Via Aldo Moro, 47 n. 15, Quarto d'Altino",
|
||||
});
|
||||
expect(node['@type']).toBe('LodgingReservation');
|
||||
expect((node.reservationFor as Record<string, unknown>).name).toBe('Via Aldo Moro');
|
||||
});
|
||||
|
||||
it('accepts a bare object and drops unknown types', () => {
|
||||
expect(nuExtractToKiReservations({ type: 'flight', from_name: 'A', to_name: 'B' })).toEqual([
|
||||
{
|
||||
'@type': 'FlightReservation',
|
||||
reservationFor: {
|
||||
departureAirport: { name: 'A' },
|
||||
arrivalAirport: { name: 'B' },
|
||||
},
|
||||
},
|
||||
]);
|
||||
expect(nuExtractToKiReservations({ reservations: [{ type: 'spaceship' }] })).toEqual([]);
|
||||
expect(nuExtractToKiReservations(null)).toEqual([]);
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user