refactor(extract): dedupe currency/day helpers, drop redundant casts, support JPY vouchers

Code-audit clean-ups: share one normCurrency between the router and the templates, lift the duplicated nearest-day resolver into formatters.resolveDayId, drop two needless as-unknown-as casts at the fillBookingWideFields call sites, restore routeExtraction's doc comment, and give the broker template readable names. Plus recognise ¥/JPY and fall back to a standalone symbol amount, so a Klook-style voucher whose price sits far from any label still yields a cost.
This commit is contained in:
Maurice
2026-06-26 10:41:29 +02:00
parent 6f21eba216
commit 801bf0539f
6 changed files with 91 additions and 37 deletions
+25 -1
View File
@@ -1,5 +1,29 @@
import { describe, it, expect } from 'vitest'
import { splitReservationDateTime } from './formatters'
import { splitReservationDateTime, resolveDayId } from './formatters'
import type { Day } from '../types'
const days = [
{ id: 10, date: '2026-05-03' },
{ id: 11, date: '2026-05-04' },
{ id: 12, date: '2026-05-22' },
] as Day[]
describe('resolveDayId', () => {
it('returns the exact-match day id', () => {
expect(resolveDayId(days, '2026-05-04')).toBe(11)
})
it('accepts a full ISO timestamp', () => {
expect(resolveDayId(days, '2026-05-22T13:30:00')).toBe(12)
})
it('falls back to the nearest day when there is no exact match', () => {
expect(resolveDayId(days, '2026-05-05')).toBe(11)
})
it('returns "" for a missing/invalid date or no days', () => {
expect(resolveDayId(days, null)).toBe('')
expect(resolveDayId(days, 'not a date')).toBe('')
expect(resolveDayId([], '2026-05-04')).toBe('')
})
})
describe('splitReservationDateTime', () => {
it('parses full ISO datetime', () => {
+22 -1
View File
@@ -1,4 +1,4 @@
import type { AssignmentsMap } from '../types'
import type { AssignmentsMap, Day } from '../types'
// Collapses verbose Nominatim display_name strings (e.g. "Place, 1, Road, Neighbourhood,
// City, County, State, Country, Postcode, Country") into "Place, Postcode, Country".
@@ -129,6 +129,27 @@ export function splitReservationDateTime(value?: string | null): { date: string
return { date: null, time: null }
}
/**
* Resolve a date (YYYY-MM-DD or an ISO timestamp) to a trip day id: exact match, else the
* nearest day so an out-of-range booking still lands on one. Returns '' when there is no
* usable date or the trip has no days — callers read that as "no day selected".
*/
export function resolveDayId(days: Day[], value: string | null | undefined): Day['id'] | '' {
const date = value ? String(value).slice(0, 10) : ''
if (!/^\d{4}-\d{2}-\d{2}$/.test(date) || days.length === 0) return ''
const exact = days.find(d => d.date === date)
if (exact) return exact.id
const target = new Date(date).getTime()
let best: Day['id'] | '' = ''
let bestDiff = Infinity
for (const d of days) {
if (!d.date) continue
const diff = Math.abs(new Date(d.date).getTime() - target)
if (diff < bestDiff) { bestDiff = diff; best = d.id }
}
return best
}
export function dayTotalCost(dayId: number, assignments: AssignmentsMap, currency: string): string | null {
const da = assignments[String(dayId)] || []
const total = da.reduce((s, a) => s + (parseFloat(String(a.place?.price ?? '')) || 0), 0)
@@ -146,6 +146,7 @@ function parseCurrency(...candidates: unknown[]): string | undefined {
if (s.includes('€') || /\bEUR\b/.test(s)) return 'EUR';
if (s.includes('£') || /\bGBP\b/.test(s)) return 'GBP';
if (s.includes('$') || /\bUSD\b/.test(s)) return 'USD';
if (s.includes('¥') || /\bJPY\b/.test(s)) return 'JPY';
const iso = s.match(/\b([A-Z]{3})\b/);
if (iso) return iso[1];
}
@@ -19,7 +19,7 @@ import type { KiReservation } from '../../booking-import/kitinerary.types';
import { nuExtractToKiReservations } from '../clients/nuextract';
import { FLAT_SCHEMA_BY_TYPE, FLIGHTS_ARRAY_SCHEMA, UNION_SINGLE_SCHEMA, type FlatType } from './flat-schemas';
import { extractEnforced } from './ollama-format.client';
import { matchVendorTemplate } from './vendor-templates';
import { matchVendorTemplate, normCurrency } from './vendor-templates';
import type { FlatLike } from './validate';
export interface RouterContext {
@@ -84,23 +84,19 @@ export function extractBookingRef(text: string): string | undefined {
return m?.[1];
}
/** Currency symbol/code → ISO 4217. */
function normCurrency(s: string): string | undefined {
const u = s.toUpperCase();
if (u.includes('€') || u === 'EUR') return 'EUR';
if (u.includes('$') || u === 'USD') return 'USD';
if (u.includes('£') || u === 'GBP') return 'GBP';
if (/^[A-Z]{3}$/.test(u)) return u;
return undefined;
}
/** The booking total, pulled deterministically (raw amount string + ISO currency). */
export function extractTotalPrice(text: string): { price: string; currency?: string } | null {
const m = text.match(
/(?:Gesamtpreis|Gesamtbetrag|Gesamtsumme|Total(?:\s*(?:price|amount))?|Amount|Summe|Betrag)\s*:?\s*([€$£]?\s*\d[\d.,]*)\s*(EUR|USD|GBP|CHF|€|\$|£)?/i,
const strip = (s: string) => s.replace(/[€$£¥\s]/g, '');
// A labeled total: "Gesamtpreis: 1.234,56 €", "Total Amount 99 USD", "Bezahlter Betrag 651,86 €".
const labeled = text.match(
/(?:Gesamtpreis|Gesamtbetrag|Gesamtsumme|Total(?:\s*(?:price|amount))?|Amount|Summe|Betrag)\s*:?\s*([€$£¥]?\s*\d[\d.,]*)\s*(EUR|USD|GBP|CHF|JPY|€|\$|£|¥)?/i,
);
if (!m) return null;
return { price: m[1].replace(/[€$£\s]/g, ''), currency: normCurrency(m[2] ?? m[1]) };
if (labeled) return { price: strip(labeled[1]), currency: normCurrency(labeled[2] ?? labeled[1]) };
// Fallback: a standalone amount carrying a currency symbol on its own line (e.g. a voucher's
// "¥9,400") — the price sits far from any label the pattern above can anchor to.
const symbol = text.match(/^\s*([€$£¥]\s?\d[\d.,]*)\b/m);
if (symbol) return { price: strip(symbol[1]), currency: normCurrency(symbol[1]) };
return null;
}
/**
@@ -177,10 +173,6 @@ async function extractSingle(text: string, ctx: RouterContext): Promise<FlatLike
return fixArrivalDate(normalizeDates({ ...out, type }));
}
/**
* Run the router on extracted document text and return schema.org KiReservation nodes.
* Returns `[]` (never throws for content reasons) so the caller degrades gracefully.
*/
/**
* Schicht 2 — fill the booking-wide fields the per-reservation extraction doesn't carry:
* the confirmation/PNR and the booking total. Applied to BOTH the deterministic vendor
@@ -188,7 +180,7 @@ async function extractSingle(text: string, ctx: RouterContext): Promise<FlatLike
* whose narrow ref/price regex missed still gets the broad doc-wide deterministic value.
* Never overrides a value the source already provided.
*/
function fillBookingWideFields(flats: Array<Record<string, unknown>>, text: string): void {
function fillBookingWideFields(flats: Record<string, unknown>[], text: string): void {
const ref = extractBookingRef(text);
const total = extractTotalPrice(text);
// A small model sometimes emits an empty string for a price it didn't find, which is
@@ -204,6 +196,10 @@ function fillBookingWideFields(flats: Array<Record<string, unknown>>, text: stri
});
}
/**
* Run the router on extracted document text and return schema.org KiReservation nodes.
* Returns `[]` (never throws for content reasons) so the caller degrades gracefully.
*/
export async function routeExtraction(text: string, ctx: RouterContext): Promise<{ kiItems: KiReservation[]; warnings: string[] }> {
const warnings: string[] = [];
@@ -212,7 +208,7 @@ export async function routeExtraction(text: string, ctx: RouterContext): Promise
// deterministic extractor would have found them.
const vendor = matchVendorTemplate(text);
if (vendor && vendor.length > 0) {
fillBookingWideFields(vendor as unknown as Array<Record<string, unknown>>, text);
fillBookingWideFields(vendor, text);
return { kiItems: nuExtractToKiReservations(vendor) as unknown as KiReservation[], warnings };
}
@@ -225,7 +221,7 @@ export async function routeExtraction(text: string, ctx: RouterContext): Promise
}
// Schicht 2 — deterministic booking-wide fields the per-call schema doesn't carry.
fillBookingWideFields(flats as unknown as Array<Record<string, unknown>>, text);
fillBookingWideFields(flats, text);
const kiItems = nuExtractToKiReservations(flats as unknown as Record<string, unknown>[]) as unknown as KiReservation[];
return { kiItems, warnings };
@@ -88,14 +88,19 @@ function enDateTime(text: string): string | null {
return `${date}T${String(h).padStart(2, '0')}:${m[5]}:00`;
}
/** Symbol/code → ISO 4217 (defaults to EUR for the EU-centric broker vouchers). */
function moneyCurrency(token: string | undefined): string {
if (!token) return 'EUR';
/** Symbol/code → ISO 4217, or undefined when none is recognised. */
export function normCurrency(token: string): string | undefined {
const u = token.toUpperCase();
if (u.includes('€')) return 'EUR';
if (u.includes('$')) return 'USD';
if (u.includes('£')) return 'GBP';
return /^[A-Z]{3}$/.test(u) ? u : 'EUR';
if (u.includes('¥')) return 'JPY';
return /^[A-Z]{3}$/.test(u) ? u : undefined;
}
/** Same, but defaults to EUR for the EU-centric broker vouchers. */
function moneyCurrency(token: string | undefined): string {
return normCurrency(token ?? '') ?? 'EUR';
}
/**
@@ -180,11 +185,11 @@ const brokerRental: VendorTemplate = {
const ref = t.match(/Reservation\s*No\.?:?\s*([A-Z0-9]{5,})/i)?.[1];
const block = (label: RegExp) =>
t.match(new RegExp(label.source + String.raw`\s*\n([^\n]+)\n([A-Za-z]{3,}\.?\s+\d{1,2},?\s+\d{4}[^\n]*)`, 'i'));
const pu = block(/PICK-?UP DETAILS/);
const dof = block(/DROP-?OFF DETAILS/);
const puTime = pu ? enDateTime(pu[2]) : null;
const doTime = dof ? enDateTime(dof[2]) : null;
if (!ref || !pu || !dof || !puTime || !doTime) return [];
const pickup = block(/PICK-?UP DETAILS/);
const dropoff = block(/DROP-?OFF DETAILS/);
const pickupTime = pickup ? enDateTime(pickup[2]) : null;
const dropoffTime = dropoff ? enDateTime(dropoff[2]) : null;
if (!ref || !pickup || !dropoff || !pickupTime || !dropoffTime) return [];
const company = t
.match(/SUPPLIER DETAILS\s*\n([^\n]+?)(?:\s+Supplier Reference|\n|$)/i)?.[1]
?.trim()
@@ -200,10 +205,10 @@ const brokerRental: VendorTemplate = {
type: 'car',
...(company ? { operator: company } : {}),
booking_reference: ref,
from_name: pu[1].trim(),
to_name: dof[1].trim(),
departure_time: puTime,
arrival_time: doTime,
from_name: pickup[1].trim(),
to_name: dropoff[1].trim(),
departure_time: pickupTime,
arrival_time: dropoffTime,
...(price ? { price, currency: moneyCurrency(priceM![1] ?? priceM![4]) } : {}),
},
];
@@ -150,4 +150,11 @@ describe('extractTotalPrice', () => {
it('reads an Airbnb "Bezahlter Betrag"', () => {
expect(extractTotalPrice(AIRBNB)).toEqual({ price: '651,86', currency: 'EUR' });
});
it('falls back to a standalone ¥ voucher price (JPY) with no nearby label', () => {
const voucher = 'Price (consumption tax included)\n金額(消費税込)\nPark Admission Date\n¥9,400\nAdult\n1-Day Passport';
expect(extractTotalPrice(voucher)).toEqual({ price: '9,400', currency: 'JPY' });
});
it('returns null when there is neither a labeled nor a symbol amount', () => {
expect(extractTotalPrice('Just some terms and conditions, no price here.')).toBeNull();
});
});