From 801bf0539f7fde7be8d1754b7bdf68707281c97c Mon Sep 17 00:00:00 2001 From: Maurice Date: Fri, 26 Jun 2026 10:41:29 +0200 Subject: [PATCH] refactor(extract): dedupe currency/day helpers, drop redundant casts, support JPY vouchers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Code-audit clean-ups: share one normCurrency between the router and the templates, lift the duplicated nearest-day resolver into formatters.resolveDayId, drop two needless as-unknown-as casts at the fillBookingWideFields call sites, restore routeExtraction's doc comment, and give the broker template readable names. Plus recognise ¥/JPY and fall back to a standalone symbol amount, so a Klook-style voucher whose price sits far from any label still yields a cost. --- client/src/utils/formatters.test.ts | 26 +++++++++++- client/src/utils/formatters.ts | 23 ++++++++++- .../src/nest/llm-parse/clients/nuextract.ts | 1 + .../llm-parse/router/extraction-router.ts | 40 +++++++++---------- .../nest/llm-parse/router/vendor-templates.ts | 31 ++++++++------ .../nest/llm-parse/vendor-templates.test.ts | 7 ++++ 6 files changed, 91 insertions(+), 37 deletions(-) diff --git a/client/src/utils/formatters.test.ts b/client/src/utils/formatters.test.ts index 08c7ab73..1e70e0cd 100644 --- a/client/src/utils/formatters.test.ts +++ b/client/src/utils/formatters.test.ts @@ -1,5 +1,29 @@ import { describe, it, expect } from 'vitest' -import { splitReservationDateTime } from './formatters' +import { splitReservationDateTime, resolveDayId } from './formatters' +import type { Day } from '../types' + +const days = [ + { id: 10, date: '2026-05-03' }, + { id: 11, date: '2026-05-04' }, + { id: 12, date: '2026-05-22' }, +] as Day[] + +describe('resolveDayId', () => { + it('returns the exact-match day id', () => { + expect(resolveDayId(days, '2026-05-04')).toBe(11) + }) + it('accepts a full ISO timestamp', () => { + expect(resolveDayId(days, '2026-05-22T13:30:00')).toBe(12) + }) + it('falls back to the nearest day when there is no exact match', () => { + expect(resolveDayId(days, '2026-05-05')).toBe(11) + }) + it('returns "" for a missing/invalid date or no days', () => { + expect(resolveDayId(days, null)).toBe('') + expect(resolveDayId(days, 'not a date')).toBe('') + expect(resolveDayId([], '2026-05-04')).toBe('') + }) +}) describe('splitReservationDateTime', () => { it('parses full ISO datetime', () => { diff --git a/client/src/utils/formatters.ts b/client/src/utils/formatters.ts index 94bfe81c..441ab116 100644 --- a/client/src/utils/formatters.ts +++ b/client/src/utils/formatters.ts @@ -1,4 +1,4 @@ -import type { AssignmentsMap } from '../types' +import type { AssignmentsMap, Day } from '../types' // Collapses verbose Nominatim display_name strings (e.g. "Place, 1, Road, Neighbourhood, // City, County, State, Country, Postcode, Country") into "Place, Postcode, Country". @@ -129,6 +129,27 @@ export function splitReservationDateTime(value?: string | null): { date: string return { date: null, time: null } } +/** + * Resolve a date (YYYY-MM-DD or an ISO timestamp) to a trip day id: exact match, else the + * nearest day so an out-of-range booking still lands on one. Returns '' when there is no + * usable date or the trip has no days — callers read that as "no day selected". + */ +export function resolveDayId(days: Day[], value: string | null | undefined): Day['id'] | '' { + const date = value ? String(value).slice(0, 10) : '' + if (!/^\d{4}-\d{2}-\d{2}$/.test(date) || days.length === 0) return '' + const exact = days.find(d => d.date === date) + if (exact) return exact.id + const target = new Date(date).getTime() + let best: Day['id'] | '' = '' + let bestDiff = Infinity + for (const d of days) { + if (!d.date) continue + const diff = Math.abs(new Date(d.date).getTime() - target) + if (diff < bestDiff) { bestDiff = diff; best = d.id } + } + return best +} + export function dayTotalCost(dayId: number, assignments: AssignmentsMap, currency: string): string | null { const da = assignments[String(dayId)] || [] const total = da.reduce((s, a) => s + (parseFloat(String(a.place?.price ?? '')) || 0), 0) diff --git a/server/src/nest/llm-parse/clients/nuextract.ts b/server/src/nest/llm-parse/clients/nuextract.ts index 4b4ec466..503f9187 100644 --- a/server/src/nest/llm-parse/clients/nuextract.ts +++ b/server/src/nest/llm-parse/clients/nuextract.ts @@ -146,6 +146,7 @@ function parseCurrency(...candidates: unknown[]): string | undefined { if (s.includes('€') || /\bEUR\b/.test(s)) return 'EUR'; if (s.includes('£') || /\bGBP\b/.test(s)) return 'GBP'; if (s.includes('$') || /\bUSD\b/.test(s)) return 'USD'; + if (s.includes('¥') || /\bJPY\b/.test(s)) return 'JPY'; const iso = s.match(/\b([A-Z]{3})\b/); if (iso) return iso[1]; } diff --git a/server/src/nest/llm-parse/router/extraction-router.ts b/server/src/nest/llm-parse/router/extraction-router.ts index 387c81f2..21422dcd 100644 --- a/server/src/nest/llm-parse/router/extraction-router.ts +++ b/server/src/nest/llm-parse/router/extraction-router.ts @@ -19,7 +19,7 @@ import type { KiReservation } from '../../booking-import/kitinerary.types'; import { nuExtractToKiReservations } from '../clients/nuextract'; import { FLAT_SCHEMA_BY_TYPE, FLIGHTS_ARRAY_SCHEMA, UNION_SINGLE_SCHEMA, type FlatType } from './flat-schemas'; import { extractEnforced } from './ollama-format.client'; -import { matchVendorTemplate } from './vendor-templates'; +import { matchVendorTemplate, normCurrency } from './vendor-templates'; import type { FlatLike } from './validate'; export interface RouterContext { @@ -84,23 +84,19 @@ export function extractBookingRef(text: string): string | undefined { return m?.[1]; } -/** Currency symbol/code → ISO 4217. */ -function normCurrency(s: string): string | undefined { - const u = s.toUpperCase(); - if (u.includes('€') || u === 'EUR') return 'EUR'; - if (u.includes('$') || u === 'USD') return 'USD'; - if (u.includes('£') || u === 'GBP') return 'GBP'; - if (/^[A-Z]{3}$/.test(u)) return u; - return undefined; -} - /** The booking total, pulled deterministically (raw amount string + ISO currency). */ export function extractTotalPrice(text: string): { price: string; currency?: string } | null { - const m = text.match( - /(?:Gesamtpreis|Gesamtbetrag|Gesamtsumme|Total(?:\s*(?:price|amount))?|Amount|Summe|Betrag)\s*:?\s*([€$£]?\s*\d[\d.,]*)\s*(EUR|USD|GBP|CHF|€|\$|£)?/i, + const strip = (s: string) => s.replace(/[€$£¥\s]/g, ''); + // A labeled total: "Gesamtpreis: 1.234,56 €", "Total Amount 99 USD", "Bezahlter Betrag 651,86 €". + const labeled = text.match( + /(?:Gesamtpreis|Gesamtbetrag|Gesamtsumme|Total(?:\s*(?:price|amount))?|Amount|Summe|Betrag)\s*:?\s*([€$£¥]?\s*\d[\d.,]*)\s*(EUR|USD|GBP|CHF|JPY|€|\$|£|¥)?/i, ); - if (!m) return null; - return { price: m[1].replace(/[€$£\s]/g, ''), currency: normCurrency(m[2] ?? m[1]) }; + if (labeled) return { price: strip(labeled[1]), currency: normCurrency(labeled[2] ?? labeled[1]) }; + // Fallback: a standalone amount carrying a currency symbol on its own line (e.g. a voucher's + // "¥9,400") — the price sits far from any label the pattern above can anchor to. + const symbol = text.match(/^\s*([€$£¥]\s?\d[\d.,]*)\b/m); + if (symbol) return { price: strip(symbol[1]), currency: normCurrency(symbol[1]) }; + return null; } /** @@ -177,10 +173,6 @@ async function extractSingle(text: string, ctx: RouterContext): Promise>, text: string): void { +function fillBookingWideFields(flats: Record[], text: string): void { const ref = extractBookingRef(text); const total = extractTotalPrice(text); // A small model sometimes emits an empty string for a price it didn't find, which is @@ -204,6 +196,10 @@ function fillBookingWideFields(flats: Array>, text: stri }); } +/** + * Run the router on extracted document text and return schema.org KiReservation nodes. + * Returns `[]` (never throws for content reasons) so the caller degrades gracefully. + */ export async function routeExtraction(text: string, ctx: RouterContext): Promise<{ kiItems: KiReservation[]; warnings: string[] }> { const warnings: string[] = []; @@ -212,7 +208,7 @@ export async function routeExtraction(text: string, ctx: RouterContext): Promise // deterministic extractor would have found them. const vendor = matchVendorTemplate(text); if (vendor && vendor.length > 0) { - fillBookingWideFields(vendor as unknown as Array>, text); + fillBookingWideFields(vendor, text); return { kiItems: nuExtractToKiReservations(vendor) as unknown as KiReservation[], warnings }; } @@ -225,7 +221,7 @@ export async function routeExtraction(text: string, ctx: RouterContext): Promise } // Schicht 2 — deterministic booking-wide fields the per-call schema doesn't carry. - fillBookingWideFields(flats as unknown as Array>, text); + fillBookingWideFields(flats, text); const kiItems = nuExtractToKiReservations(flats as unknown as Record[]) as unknown as KiReservation[]; return { kiItems, warnings }; diff --git a/server/src/nest/llm-parse/router/vendor-templates.ts b/server/src/nest/llm-parse/router/vendor-templates.ts index 4178bad6..c6d8f16e 100644 --- a/server/src/nest/llm-parse/router/vendor-templates.ts +++ b/server/src/nest/llm-parse/router/vendor-templates.ts @@ -88,14 +88,19 @@ function enDateTime(text: string): string | null { return `${date}T${String(h).padStart(2, '0')}:${m[5]}:00`; } -/** Symbol/code → ISO 4217 (defaults to EUR for the EU-centric broker vouchers). */ -function moneyCurrency(token: string | undefined): string { - if (!token) return 'EUR'; +/** Symbol/code → ISO 4217, or undefined when none is recognised. */ +export function normCurrency(token: string): string | undefined { const u = token.toUpperCase(); if (u.includes('€')) return 'EUR'; if (u.includes('$')) return 'USD'; if (u.includes('£')) return 'GBP'; - return /^[A-Z]{3}$/.test(u) ? u : 'EUR'; + if (u.includes('¥')) return 'JPY'; + return /^[A-Z]{3}$/.test(u) ? u : undefined; +} + +/** Same, but defaults to EUR for the EU-centric broker vouchers. */ +function moneyCurrency(token: string | undefined): string { + return normCurrency(token ?? '') ?? 'EUR'; } /** @@ -180,11 +185,11 @@ const brokerRental: VendorTemplate = { const ref = t.match(/Reservation\s*No\.?:?\s*([A-Z0-9]{5,})/i)?.[1]; const block = (label: RegExp) => t.match(new RegExp(label.source + String.raw`\s*\n([^\n]+)\n([A-Za-z]{3,}\.?\s+\d{1,2},?\s+\d{4}[^\n]*)`, 'i')); - const pu = block(/PICK-?UP DETAILS/); - const dof = block(/DROP-?OFF DETAILS/); - const puTime = pu ? enDateTime(pu[2]) : null; - const doTime = dof ? enDateTime(dof[2]) : null; - if (!ref || !pu || !dof || !puTime || !doTime) return []; + const pickup = block(/PICK-?UP DETAILS/); + const dropoff = block(/DROP-?OFF DETAILS/); + const pickupTime = pickup ? enDateTime(pickup[2]) : null; + const dropoffTime = dropoff ? enDateTime(dropoff[2]) : null; + if (!ref || !pickup || !dropoff || !pickupTime || !dropoffTime) return []; const company = t .match(/SUPPLIER DETAILS\s*\n([^\n]+?)(?:\s+Supplier Reference|\n|$)/i)?.[1] ?.trim() @@ -200,10 +205,10 @@ const brokerRental: VendorTemplate = { type: 'car', ...(company ? { operator: company } : {}), booking_reference: ref, - from_name: pu[1].trim(), - to_name: dof[1].trim(), - departure_time: puTime, - arrival_time: doTime, + from_name: pickup[1].trim(), + to_name: dropoff[1].trim(), + departure_time: pickupTime, + arrival_time: dropoffTime, ...(price ? { price, currency: moneyCurrency(priceM![1] ?? priceM![4]) } : {}), }, ]; diff --git a/server/tests/unit/nest/llm-parse/vendor-templates.test.ts b/server/tests/unit/nest/llm-parse/vendor-templates.test.ts index 6aff5019..05cce1aa 100644 --- a/server/tests/unit/nest/llm-parse/vendor-templates.test.ts +++ b/server/tests/unit/nest/llm-parse/vendor-templates.test.ts @@ -150,4 +150,11 @@ describe('extractTotalPrice', () => { it('reads an Airbnb "Bezahlter Betrag"', () => { expect(extractTotalPrice(AIRBNB)).toEqual({ price: '651,86', currency: 'EUR' }); }); + it('falls back to a standalone ¥ voucher price (JPY) with no nearby label', () => { + const voucher = 'Price (consumption tax included)\n金額(消費税込)\nPark Admission Date\n¥9,400\nAdult\n1-Day Passport'; + expect(extractTotalPrice(voucher)).toEqual({ price: '9,400', currency: 'JPY' }); + }); + it('returns null when there is neither a labeled nor a symbol amount', () => { + expect(extractTotalPrice('Just some terms and conditions, no price here.')).toBeNull(); + }); });