mirror of
https://github.com/mauriceboe/TREK.git
synced 2026-06-28 01:31:47 +00:00
refactor(extract): dedupe currency/day helpers, drop redundant casts, support JPY vouchers
Code-audit clean-ups: share one normCurrency between the router and the templates, lift the duplicated nearest-day resolver into formatters.resolveDayId, drop two needless as-unknown-as casts at the fillBookingWideFields call sites, restore routeExtraction's doc comment, and give the broker template readable names. Plus recognise ¥/JPY and fall back to a standalone symbol amount, so a Klook-style voucher whose price sits far from any label still yields a cost.
This commit is contained in:
@@ -1,5 +1,29 @@
|
||||
import { describe, it, expect } from 'vitest'
|
||||
import { splitReservationDateTime } from './formatters'
|
||||
import { splitReservationDateTime, resolveDayId } from './formatters'
|
||||
import type { Day } from '../types'
|
||||
|
||||
const days = [
|
||||
{ id: 10, date: '2026-05-03' },
|
||||
{ id: 11, date: '2026-05-04' },
|
||||
{ id: 12, date: '2026-05-22' },
|
||||
] as Day[]
|
||||
|
||||
describe('resolveDayId', () => {
|
||||
it('returns the exact-match day id', () => {
|
||||
expect(resolveDayId(days, '2026-05-04')).toBe(11)
|
||||
})
|
||||
it('accepts a full ISO timestamp', () => {
|
||||
expect(resolveDayId(days, '2026-05-22T13:30:00')).toBe(12)
|
||||
})
|
||||
it('falls back to the nearest day when there is no exact match', () => {
|
||||
expect(resolveDayId(days, '2026-05-05')).toBe(11)
|
||||
})
|
||||
it('returns "" for a missing/invalid date or no days', () => {
|
||||
expect(resolveDayId(days, null)).toBe('')
|
||||
expect(resolveDayId(days, 'not a date')).toBe('')
|
||||
expect(resolveDayId([], '2026-05-04')).toBe('')
|
||||
})
|
||||
})
|
||||
|
||||
describe('splitReservationDateTime', () => {
|
||||
it('parses full ISO datetime', () => {
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import type { AssignmentsMap } from '../types'
|
||||
import type { AssignmentsMap, Day } from '../types'
|
||||
|
||||
// Collapses verbose Nominatim display_name strings (e.g. "Place, 1, Road, Neighbourhood,
|
||||
// City, County, State, Country, Postcode, Country") into "Place, Postcode, Country".
|
||||
@@ -129,6 +129,27 @@ export function splitReservationDateTime(value?: string | null): { date: string
|
||||
return { date: null, time: null }
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve a date (YYYY-MM-DD or an ISO timestamp) to a trip day id: exact match, else the
|
||||
* nearest day so an out-of-range booking still lands on one. Returns '' when there is no
|
||||
* usable date or the trip has no days — callers read that as "no day selected".
|
||||
*/
|
||||
export function resolveDayId(days: Day[], value: string | null | undefined): Day['id'] | '' {
|
||||
const date = value ? String(value).slice(0, 10) : ''
|
||||
if (!/^\d{4}-\d{2}-\d{2}$/.test(date) || days.length === 0) return ''
|
||||
const exact = days.find(d => d.date === date)
|
||||
if (exact) return exact.id
|
||||
const target = new Date(date).getTime()
|
||||
let best: Day['id'] | '' = ''
|
||||
let bestDiff = Infinity
|
||||
for (const d of days) {
|
||||
if (!d.date) continue
|
||||
const diff = Math.abs(new Date(d.date).getTime() - target)
|
||||
if (diff < bestDiff) { bestDiff = diff; best = d.id }
|
||||
}
|
||||
return best
|
||||
}
|
||||
|
||||
export function dayTotalCost(dayId: number, assignments: AssignmentsMap, currency: string): string | null {
|
||||
const da = assignments[String(dayId)] || []
|
||||
const total = da.reduce((s, a) => s + (parseFloat(String(a.place?.price ?? '')) || 0), 0)
|
||||
|
||||
@@ -146,6 +146,7 @@ function parseCurrency(...candidates: unknown[]): string | undefined {
|
||||
if (s.includes('€') || /\bEUR\b/.test(s)) return 'EUR';
|
||||
if (s.includes('£') || /\bGBP\b/.test(s)) return 'GBP';
|
||||
if (s.includes('$') || /\bUSD\b/.test(s)) return 'USD';
|
||||
if (s.includes('¥') || /\bJPY\b/.test(s)) return 'JPY';
|
||||
const iso = s.match(/\b([A-Z]{3})\b/);
|
||||
if (iso) return iso[1];
|
||||
}
|
||||
|
||||
@@ -19,7 +19,7 @@ import type { KiReservation } from '../../booking-import/kitinerary.types';
|
||||
import { nuExtractToKiReservations } from '../clients/nuextract';
|
||||
import { FLAT_SCHEMA_BY_TYPE, FLIGHTS_ARRAY_SCHEMA, UNION_SINGLE_SCHEMA, type FlatType } from './flat-schemas';
|
||||
import { extractEnforced } from './ollama-format.client';
|
||||
import { matchVendorTemplate } from './vendor-templates';
|
||||
import { matchVendorTemplate, normCurrency } from './vendor-templates';
|
||||
import type { FlatLike } from './validate';
|
||||
|
||||
export interface RouterContext {
|
||||
@@ -84,23 +84,19 @@ export function extractBookingRef(text: string): string | undefined {
|
||||
return m?.[1];
|
||||
}
|
||||
|
||||
/** Currency symbol/code → ISO 4217. */
|
||||
function normCurrency(s: string): string | undefined {
|
||||
const u = s.toUpperCase();
|
||||
if (u.includes('€') || u === 'EUR') return 'EUR';
|
||||
if (u.includes('$') || u === 'USD') return 'USD';
|
||||
if (u.includes('£') || u === 'GBP') return 'GBP';
|
||||
if (/^[A-Z]{3}$/.test(u)) return u;
|
||||
return undefined;
|
||||
}
|
||||
|
||||
/** The booking total, pulled deterministically (raw amount string + ISO currency). */
|
||||
export function extractTotalPrice(text: string): { price: string; currency?: string } | null {
|
||||
const m = text.match(
|
||||
/(?:Gesamtpreis|Gesamtbetrag|Gesamtsumme|Total(?:\s*(?:price|amount))?|Amount|Summe|Betrag)\s*:?\s*([€$£]?\s*\d[\d.,]*)\s*(EUR|USD|GBP|CHF|€|\$|£)?/i,
|
||||
const strip = (s: string) => s.replace(/[€$£¥\s]/g, '');
|
||||
// A labeled total: "Gesamtpreis: 1.234,56 €", "Total Amount 99 USD", "Bezahlter Betrag 651,86 €".
|
||||
const labeled = text.match(
|
||||
/(?:Gesamtpreis|Gesamtbetrag|Gesamtsumme|Total(?:\s*(?:price|amount))?|Amount|Summe|Betrag)\s*:?\s*([€$£¥]?\s*\d[\d.,]*)\s*(EUR|USD|GBP|CHF|JPY|€|\$|£|¥)?/i,
|
||||
);
|
||||
if (!m) return null;
|
||||
return { price: m[1].replace(/[€$£\s]/g, ''), currency: normCurrency(m[2] ?? m[1]) };
|
||||
if (labeled) return { price: strip(labeled[1]), currency: normCurrency(labeled[2] ?? labeled[1]) };
|
||||
// Fallback: a standalone amount carrying a currency symbol on its own line (e.g. a voucher's
|
||||
// "¥9,400") — the price sits far from any label the pattern above can anchor to.
|
||||
const symbol = text.match(/^\s*([€$£¥]\s?\d[\d.,]*)\b/m);
|
||||
if (symbol) return { price: strip(symbol[1]), currency: normCurrency(symbol[1]) };
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -177,10 +173,6 @@ async function extractSingle(text: string, ctx: RouterContext): Promise<FlatLike
|
||||
return fixArrivalDate(normalizeDates({ ...out, type }));
|
||||
}
|
||||
|
||||
/**
|
||||
* Run the router on extracted document text and return schema.org KiReservation nodes.
|
||||
* Returns `[]` (never throws for content reasons) so the caller degrades gracefully.
|
||||
*/
|
||||
/**
|
||||
* Schicht 2 — fill the booking-wide fields the per-reservation extraction doesn't carry:
|
||||
* the confirmation/PNR and the booking total. Applied to BOTH the deterministic vendor
|
||||
@@ -188,7 +180,7 @@ async function extractSingle(text: string, ctx: RouterContext): Promise<FlatLike
|
||||
* whose narrow ref/price regex missed still gets the broad doc-wide deterministic value.
|
||||
* Never overrides a value the source already provided.
|
||||
*/
|
||||
function fillBookingWideFields(flats: Array<Record<string, unknown>>, text: string): void {
|
||||
function fillBookingWideFields(flats: Record<string, unknown>[], text: string): void {
|
||||
const ref = extractBookingRef(text);
|
||||
const total = extractTotalPrice(text);
|
||||
// A small model sometimes emits an empty string for a price it didn't find, which is
|
||||
@@ -204,6 +196,10 @@ function fillBookingWideFields(flats: Array<Record<string, unknown>>, text: stri
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Run the router on extracted document text and return schema.org KiReservation nodes.
|
||||
* Returns `[]` (never throws for content reasons) so the caller degrades gracefully.
|
||||
*/
|
||||
export async function routeExtraction(text: string, ctx: RouterContext): Promise<{ kiItems: KiReservation[]; warnings: string[] }> {
|
||||
const warnings: string[] = [];
|
||||
|
||||
@@ -212,7 +208,7 @@ export async function routeExtraction(text: string, ctx: RouterContext): Promise
|
||||
// deterministic extractor would have found them.
|
||||
const vendor = matchVendorTemplate(text);
|
||||
if (vendor && vendor.length > 0) {
|
||||
fillBookingWideFields(vendor as unknown as Array<Record<string, unknown>>, text);
|
||||
fillBookingWideFields(vendor, text);
|
||||
return { kiItems: nuExtractToKiReservations(vendor) as unknown as KiReservation[], warnings };
|
||||
}
|
||||
|
||||
@@ -225,7 +221,7 @@ export async function routeExtraction(text: string, ctx: RouterContext): Promise
|
||||
}
|
||||
|
||||
// Schicht 2 — deterministic booking-wide fields the per-call schema doesn't carry.
|
||||
fillBookingWideFields(flats as unknown as Array<Record<string, unknown>>, text);
|
||||
fillBookingWideFields(flats, text);
|
||||
|
||||
const kiItems = nuExtractToKiReservations(flats as unknown as Record<string, unknown>[]) as unknown as KiReservation[];
|
||||
return { kiItems, warnings };
|
||||
|
||||
@@ -88,14 +88,19 @@ function enDateTime(text: string): string | null {
|
||||
return `${date}T${String(h).padStart(2, '0')}:${m[5]}:00`;
|
||||
}
|
||||
|
||||
/** Symbol/code → ISO 4217 (defaults to EUR for the EU-centric broker vouchers). */
|
||||
function moneyCurrency(token: string | undefined): string {
|
||||
if (!token) return 'EUR';
|
||||
/** Symbol/code → ISO 4217, or undefined when none is recognised. */
|
||||
export function normCurrency(token: string): string | undefined {
|
||||
const u = token.toUpperCase();
|
||||
if (u.includes('€')) return 'EUR';
|
||||
if (u.includes('$')) return 'USD';
|
||||
if (u.includes('£')) return 'GBP';
|
||||
return /^[A-Z]{3}$/.test(u) ? u : 'EUR';
|
||||
if (u.includes('¥')) return 'JPY';
|
||||
return /^[A-Z]{3}$/.test(u) ? u : undefined;
|
||||
}
|
||||
|
||||
/** Same, but defaults to EUR for the EU-centric broker vouchers. */
|
||||
function moneyCurrency(token: string | undefined): string {
|
||||
return normCurrency(token ?? '') ?? 'EUR';
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -180,11 +185,11 @@ const brokerRental: VendorTemplate = {
|
||||
const ref = t.match(/Reservation\s*No\.?:?\s*([A-Z0-9]{5,})/i)?.[1];
|
||||
const block = (label: RegExp) =>
|
||||
t.match(new RegExp(label.source + String.raw`\s*\n([^\n]+)\n([A-Za-z]{3,}\.?\s+\d{1,2},?\s+\d{4}[^\n]*)`, 'i'));
|
||||
const pu = block(/PICK-?UP DETAILS/);
|
||||
const dof = block(/DROP-?OFF DETAILS/);
|
||||
const puTime = pu ? enDateTime(pu[2]) : null;
|
||||
const doTime = dof ? enDateTime(dof[2]) : null;
|
||||
if (!ref || !pu || !dof || !puTime || !doTime) return [];
|
||||
const pickup = block(/PICK-?UP DETAILS/);
|
||||
const dropoff = block(/DROP-?OFF DETAILS/);
|
||||
const pickupTime = pickup ? enDateTime(pickup[2]) : null;
|
||||
const dropoffTime = dropoff ? enDateTime(dropoff[2]) : null;
|
||||
if (!ref || !pickup || !dropoff || !pickupTime || !dropoffTime) return [];
|
||||
const company = t
|
||||
.match(/SUPPLIER DETAILS\s*\n([^\n]+?)(?:\s+Supplier Reference|\n|$)/i)?.[1]
|
||||
?.trim()
|
||||
@@ -200,10 +205,10 @@ const brokerRental: VendorTemplate = {
|
||||
type: 'car',
|
||||
...(company ? { operator: company } : {}),
|
||||
booking_reference: ref,
|
||||
from_name: pu[1].trim(),
|
||||
to_name: dof[1].trim(),
|
||||
departure_time: puTime,
|
||||
arrival_time: doTime,
|
||||
from_name: pickup[1].trim(),
|
||||
to_name: dropoff[1].trim(),
|
||||
departure_time: pickupTime,
|
||||
arrival_time: dropoffTime,
|
||||
...(price ? { price, currency: moneyCurrency(priceM![1] ?? priceM![4]) } : {}),
|
||||
},
|
||||
];
|
||||
|
||||
@@ -150,4 +150,11 @@ describe('extractTotalPrice', () => {
|
||||
it('reads an Airbnb "Bezahlter Betrag"', () => {
|
||||
expect(extractTotalPrice(AIRBNB)).toEqual({ price: '651,86', currency: 'EUR' });
|
||||
});
|
||||
it('falls back to a standalone ¥ voucher price (JPY) with no nearby label', () => {
|
||||
const voucher = 'Price (consumption tax included)\n金額(消費税込)\nPark Admission Date\n¥9,400\nAdult\n1-Day Passport';
|
||||
expect(extractTotalPrice(voucher)).toEqual({ price: '9,400', currency: 'JPY' });
|
||||
});
|
||||
it('returns null when there is neither a labeled nor a symbol amount', () => {
|
||||
expect(extractTotalPrice('Just some terms and conditions, no price here.')).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user