mirror of
https://github.com/mauriceboe/TREK.git
synced 2026-06-28 09:41:47 +00:00
refactor(extract): dedupe currency/day helpers, drop redundant casts, support JPY vouchers
Code-audit clean-ups: share one normCurrency between the router and the templates, lift the duplicated nearest-day resolver into formatters.resolveDayId, drop two needless as-unknown-as casts at the fillBookingWideFields call sites, restore routeExtraction's doc comment, and give the broker template readable names. Plus recognise ¥/JPY and fall back to a standalone symbol amount, so a Klook-style voucher whose price sits far from any label still yields a cost.
This commit is contained in:
@@ -1,5 +1,29 @@
|
|||||||
import { describe, it, expect } from 'vitest'
|
import { describe, it, expect } from 'vitest'
|
||||||
import { splitReservationDateTime } from './formatters'
|
import { splitReservationDateTime, resolveDayId } from './formatters'
|
||||||
|
import type { Day } from '../types'
|
||||||
|
|
||||||
|
const days = [
|
||||||
|
{ id: 10, date: '2026-05-03' },
|
||||||
|
{ id: 11, date: '2026-05-04' },
|
||||||
|
{ id: 12, date: '2026-05-22' },
|
||||||
|
] as Day[]
|
||||||
|
|
||||||
|
describe('resolveDayId', () => {
|
||||||
|
it('returns the exact-match day id', () => {
|
||||||
|
expect(resolveDayId(days, '2026-05-04')).toBe(11)
|
||||||
|
})
|
||||||
|
it('accepts a full ISO timestamp', () => {
|
||||||
|
expect(resolveDayId(days, '2026-05-22T13:30:00')).toBe(12)
|
||||||
|
})
|
||||||
|
it('falls back to the nearest day when there is no exact match', () => {
|
||||||
|
expect(resolveDayId(days, '2026-05-05')).toBe(11)
|
||||||
|
})
|
||||||
|
it('returns "" for a missing/invalid date or no days', () => {
|
||||||
|
expect(resolveDayId(days, null)).toBe('')
|
||||||
|
expect(resolveDayId(days, 'not a date')).toBe('')
|
||||||
|
expect(resolveDayId([], '2026-05-04')).toBe('')
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
describe('splitReservationDateTime', () => {
|
describe('splitReservationDateTime', () => {
|
||||||
it('parses full ISO datetime', () => {
|
it('parses full ISO datetime', () => {
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
import type { AssignmentsMap } from '../types'
|
import type { AssignmentsMap, Day } from '../types'
|
||||||
|
|
||||||
// Collapses verbose Nominatim display_name strings (e.g. "Place, 1, Road, Neighbourhood,
|
// Collapses verbose Nominatim display_name strings (e.g. "Place, 1, Road, Neighbourhood,
|
||||||
// City, County, State, Country, Postcode, Country") into "Place, Postcode, Country".
|
// City, County, State, Country, Postcode, Country") into "Place, Postcode, Country".
|
||||||
@@ -129,6 +129,27 @@ export function splitReservationDateTime(value?: string | null): { date: string
|
|||||||
return { date: null, time: null }
|
return { date: null, time: null }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Resolve a date (YYYY-MM-DD or an ISO timestamp) to a trip day id: exact match, else the
|
||||||
|
* nearest day so an out-of-range booking still lands on one. Returns '' when there is no
|
||||||
|
* usable date or the trip has no days — callers read that as "no day selected".
|
||||||
|
*/
|
||||||
|
export function resolveDayId(days: Day[], value: string | null | undefined): Day['id'] | '' {
|
||||||
|
const date = value ? String(value).slice(0, 10) : ''
|
||||||
|
if (!/^\d{4}-\d{2}-\d{2}$/.test(date) || days.length === 0) return ''
|
||||||
|
const exact = days.find(d => d.date === date)
|
||||||
|
if (exact) return exact.id
|
||||||
|
const target = new Date(date).getTime()
|
||||||
|
let best: Day['id'] | '' = ''
|
||||||
|
let bestDiff = Infinity
|
||||||
|
for (const d of days) {
|
||||||
|
if (!d.date) continue
|
||||||
|
const diff = Math.abs(new Date(d.date).getTime() - target)
|
||||||
|
if (diff < bestDiff) { bestDiff = diff; best = d.id }
|
||||||
|
}
|
||||||
|
return best
|
||||||
|
}
|
||||||
|
|
||||||
export function dayTotalCost(dayId: number, assignments: AssignmentsMap, currency: string): string | null {
|
export function dayTotalCost(dayId: number, assignments: AssignmentsMap, currency: string): string | null {
|
||||||
const da = assignments[String(dayId)] || []
|
const da = assignments[String(dayId)] || []
|
||||||
const total = da.reduce((s, a) => s + (parseFloat(String(a.place?.price ?? '')) || 0), 0)
|
const total = da.reduce((s, a) => s + (parseFloat(String(a.place?.price ?? '')) || 0), 0)
|
||||||
|
|||||||
@@ -146,6 +146,7 @@ function parseCurrency(...candidates: unknown[]): string | undefined {
|
|||||||
if (s.includes('€') || /\bEUR\b/.test(s)) return 'EUR';
|
if (s.includes('€') || /\bEUR\b/.test(s)) return 'EUR';
|
||||||
if (s.includes('£') || /\bGBP\b/.test(s)) return 'GBP';
|
if (s.includes('£') || /\bGBP\b/.test(s)) return 'GBP';
|
||||||
if (s.includes('$') || /\bUSD\b/.test(s)) return 'USD';
|
if (s.includes('$') || /\bUSD\b/.test(s)) return 'USD';
|
||||||
|
if (s.includes('¥') || /\bJPY\b/.test(s)) return 'JPY';
|
||||||
const iso = s.match(/\b([A-Z]{3})\b/);
|
const iso = s.match(/\b([A-Z]{3})\b/);
|
||||||
if (iso) return iso[1];
|
if (iso) return iso[1];
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -19,7 +19,7 @@ import type { KiReservation } from '../../booking-import/kitinerary.types';
|
|||||||
import { nuExtractToKiReservations } from '../clients/nuextract';
|
import { nuExtractToKiReservations } from '../clients/nuextract';
|
||||||
import { FLAT_SCHEMA_BY_TYPE, FLIGHTS_ARRAY_SCHEMA, UNION_SINGLE_SCHEMA, type FlatType } from './flat-schemas';
|
import { FLAT_SCHEMA_BY_TYPE, FLIGHTS_ARRAY_SCHEMA, UNION_SINGLE_SCHEMA, type FlatType } from './flat-schemas';
|
||||||
import { extractEnforced } from './ollama-format.client';
|
import { extractEnforced } from './ollama-format.client';
|
||||||
import { matchVendorTemplate } from './vendor-templates';
|
import { matchVendorTemplate, normCurrency } from './vendor-templates';
|
||||||
import type { FlatLike } from './validate';
|
import type { FlatLike } from './validate';
|
||||||
|
|
||||||
export interface RouterContext {
|
export interface RouterContext {
|
||||||
@@ -84,23 +84,19 @@ export function extractBookingRef(text: string): string | undefined {
|
|||||||
return m?.[1];
|
return m?.[1];
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Currency symbol/code → ISO 4217. */
|
|
||||||
function normCurrency(s: string): string | undefined {
|
|
||||||
const u = s.toUpperCase();
|
|
||||||
if (u.includes('€') || u === 'EUR') return 'EUR';
|
|
||||||
if (u.includes('$') || u === 'USD') return 'USD';
|
|
||||||
if (u.includes('£') || u === 'GBP') return 'GBP';
|
|
||||||
if (/^[A-Z]{3}$/.test(u)) return u;
|
|
||||||
return undefined;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** The booking total, pulled deterministically (raw amount string + ISO currency). */
|
/** The booking total, pulled deterministically (raw amount string + ISO currency). */
|
||||||
export function extractTotalPrice(text: string): { price: string; currency?: string } | null {
|
export function extractTotalPrice(text: string): { price: string; currency?: string } | null {
|
||||||
const m = text.match(
|
const strip = (s: string) => s.replace(/[€$£¥\s]/g, '');
|
||||||
/(?:Gesamtpreis|Gesamtbetrag|Gesamtsumme|Total(?:\s*(?:price|amount))?|Amount|Summe|Betrag)\s*:?\s*([€$£]?\s*\d[\d.,]*)\s*(EUR|USD|GBP|CHF|€|\$|£)?/i,
|
// A labeled total: "Gesamtpreis: 1.234,56 €", "Total Amount 99 USD", "Bezahlter Betrag 651,86 €".
|
||||||
|
const labeled = text.match(
|
||||||
|
/(?:Gesamtpreis|Gesamtbetrag|Gesamtsumme|Total(?:\s*(?:price|amount))?|Amount|Summe|Betrag)\s*:?\s*([€$£¥]?\s*\d[\d.,]*)\s*(EUR|USD|GBP|CHF|JPY|€|\$|£|¥)?/i,
|
||||||
);
|
);
|
||||||
if (!m) return null;
|
if (labeled) return { price: strip(labeled[1]), currency: normCurrency(labeled[2] ?? labeled[1]) };
|
||||||
return { price: m[1].replace(/[€$£\s]/g, ''), currency: normCurrency(m[2] ?? m[1]) };
|
// Fallback: a standalone amount carrying a currency symbol on its own line (e.g. a voucher's
|
||||||
|
// "¥9,400") — the price sits far from any label the pattern above can anchor to.
|
||||||
|
const symbol = text.match(/^\s*([€$£¥]\s?\d[\d.,]*)\b/m);
|
||||||
|
if (symbol) return { price: strip(symbol[1]), currency: normCurrency(symbol[1]) };
|
||||||
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -177,10 +173,6 @@ async function extractSingle(text: string, ctx: RouterContext): Promise<FlatLike
|
|||||||
return fixArrivalDate(normalizeDates({ ...out, type }));
|
return fixArrivalDate(normalizeDates({ ...out, type }));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Run the router on extracted document text and return schema.org KiReservation nodes.
|
|
||||||
* Returns `[]` (never throws for content reasons) so the caller degrades gracefully.
|
|
||||||
*/
|
|
||||||
/**
|
/**
|
||||||
* Schicht 2 — fill the booking-wide fields the per-reservation extraction doesn't carry:
|
* Schicht 2 — fill the booking-wide fields the per-reservation extraction doesn't carry:
|
||||||
* the confirmation/PNR and the booking total. Applied to BOTH the deterministic vendor
|
* the confirmation/PNR and the booking total. Applied to BOTH the deterministic vendor
|
||||||
@@ -188,7 +180,7 @@ async function extractSingle(text: string, ctx: RouterContext): Promise<FlatLike
|
|||||||
* whose narrow ref/price regex missed still gets the broad doc-wide deterministic value.
|
* whose narrow ref/price regex missed still gets the broad doc-wide deterministic value.
|
||||||
* Never overrides a value the source already provided.
|
* Never overrides a value the source already provided.
|
||||||
*/
|
*/
|
||||||
function fillBookingWideFields(flats: Array<Record<string, unknown>>, text: string): void {
|
function fillBookingWideFields(flats: Record<string, unknown>[], text: string): void {
|
||||||
const ref = extractBookingRef(text);
|
const ref = extractBookingRef(text);
|
||||||
const total = extractTotalPrice(text);
|
const total = extractTotalPrice(text);
|
||||||
// A small model sometimes emits an empty string for a price it didn't find, which is
|
// A small model sometimes emits an empty string for a price it didn't find, which is
|
||||||
@@ -204,6 +196,10 @@ function fillBookingWideFields(flats: Array<Record<string, unknown>>, text: stri
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Run the router on extracted document text and return schema.org KiReservation nodes.
|
||||||
|
* Returns `[]` (never throws for content reasons) so the caller degrades gracefully.
|
||||||
|
*/
|
||||||
export async function routeExtraction(text: string, ctx: RouterContext): Promise<{ kiItems: KiReservation[]; warnings: string[] }> {
|
export async function routeExtraction(text: string, ctx: RouterContext): Promise<{ kiItems: KiReservation[]; warnings: string[] }> {
|
||||||
const warnings: string[] = [];
|
const warnings: string[] = [];
|
||||||
|
|
||||||
@@ -212,7 +208,7 @@ export async function routeExtraction(text: string, ctx: RouterContext): Promise
|
|||||||
// deterministic extractor would have found them.
|
// deterministic extractor would have found them.
|
||||||
const vendor = matchVendorTemplate(text);
|
const vendor = matchVendorTemplate(text);
|
||||||
if (vendor && vendor.length > 0) {
|
if (vendor && vendor.length > 0) {
|
||||||
fillBookingWideFields(vendor as unknown as Array<Record<string, unknown>>, text);
|
fillBookingWideFields(vendor, text);
|
||||||
return { kiItems: nuExtractToKiReservations(vendor) as unknown as KiReservation[], warnings };
|
return { kiItems: nuExtractToKiReservations(vendor) as unknown as KiReservation[], warnings };
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -225,7 +221,7 @@ export async function routeExtraction(text: string, ctx: RouterContext): Promise
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Schicht 2 — deterministic booking-wide fields the per-call schema doesn't carry.
|
// Schicht 2 — deterministic booking-wide fields the per-call schema doesn't carry.
|
||||||
fillBookingWideFields(flats as unknown as Array<Record<string, unknown>>, text);
|
fillBookingWideFields(flats, text);
|
||||||
|
|
||||||
const kiItems = nuExtractToKiReservations(flats as unknown as Record<string, unknown>[]) as unknown as KiReservation[];
|
const kiItems = nuExtractToKiReservations(flats as unknown as Record<string, unknown>[]) as unknown as KiReservation[];
|
||||||
return { kiItems, warnings };
|
return { kiItems, warnings };
|
||||||
|
|||||||
@@ -88,14 +88,19 @@ function enDateTime(text: string): string | null {
|
|||||||
return `${date}T${String(h).padStart(2, '0')}:${m[5]}:00`;
|
return `${date}T${String(h).padStart(2, '0')}:${m[5]}:00`;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Symbol/code → ISO 4217 (defaults to EUR for the EU-centric broker vouchers). */
|
/** Symbol/code → ISO 4217, or undefined when none is recognised. */
|
||||||
function moneyCurrency(token: string | undefined): string {
|
export function normCurrency(token: string): string | undefined {
|
||||||
if (!token) return 'EUR';
|
|
||||||
const u = token.toUpperCase();
|
const u = token.toUpperCase();
|
||||||
if (u.includes('€')) return 'EUR';
|
if (u.includes('€')) return 'EUR';
|
||||||
if (u.includes('$')) return 'USD';
|
if (u.includes('$')) return 'USD';
|
||||||
if (u.includes('£')) return 'GBP';
|
if (u.includes('£')) return 'GBP';
|
||||||
return /^[A-Z]{3}$/.test(u) ? u : 'EUR';
|
if (u.includes('¥')) return 'JPY';
|
||||||
|
return /^[A-Z]{3}$/.test(u) ? u : undefined;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Same, but defaults to EUR for the EU-centric broker vouchers. */
|
||||||
|
function moneyCurrency(token: string | undefined): string {
|
||||||
|
return normCurrency(token ?? '') ?? 'EUR';
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -180,11 +185,11 @@ const brokerRental: VendorTemplate = {
|
|||||||
const ref = t.match(/Reservation\s*No\.?:?\s*([A-Z0-9]{5,})/i)?.[1];
|
const ref = t.match(/Reservation\s*No\.?:?\s*([A-Z0-9]{5,})/i)?.[1];
|
||||||
const block = (label: RegExp) =>
|
const block = (label: RegExp) =>
|
||||||
t.match(new RegExp(label.source + String.raw`\s*\n([^\n]+)\n([A-Za-z]{3,}\.?\s+\d{1,2},?\s+\d{4}[^\n]*)`, 'i'));
|
t.match(new RegExp(label.source + String.raw`\s*\n([^\n]+)\n([A-Za-z]{3,}\.?\s+\d{1,2},?\s+\d{4}[^\n]*)`, 'i'));
|
||||||
const pu = block(/PICK-?UP DETAILS/);
|
const pickup = block(/PICK-?UP DETAILS/);
|
||||||
const dof = block(/DROP-?OFF DETAILS/);
|
const dropoff = block(/DROP-?OFF DETAILS/);
|
||||||
const puTime = pu ? enDateTime(pu[2]) : null;
|
const pickupTime = pickup ? enDateTime(pickup[2]) : null;
|
||||||
const doTime = dof ? enDateTime(dof[2]) : null;
|
const dropoffTime = dropoff ? enDateTime(dropoff[2]) : null;
|
||||||
if (!ref || !pu || !dof || !puTime || !doTime) return [];
|
if (!ref || !pickup || !dropoff || !pickupTime || !dropoffTime) return [];
|
||||||
const company = t
|
const company = t
|
||||||
.match(/SUPPLIER DETAILS\s*\n([^\n]+?)(?:\s+Supplier Reference|\n|$)/i)?.[1]
|
.match(/SUPPLIER DETAILS\s*\n([^\n]+?)(?:\s+Supplier Reference|\n|$)/i)?.[1]
|
||||||
?.trim()
|
?.trim()
|
||||||
@@ -200,10 +205,10 @@ const brokerRental: VendorTemplate = {
|
|||||||
type: 'car',
|
type: 'car',
|
||||||
...(company ? { operator: company } : {}),
|
...(company ? { operator: company } : {}),
|
||||||
booking_reference: ref,
|
booking_reference: ref,
|
||||||
from_name: pu[1].trim(),
|
from_name: pickup[1].trim(),
|
||||||
to_name: dof[1].trim(),
|
to_name: dropoff[1].trim(),
|
||||||
departure_time: puTime,
|
departure_time: pickupTime,
|
||||||
arrival_time: doTime,
|
arrival_time: dropoffTime,
|
||||||
...(price ? { price, currency: moneyCurrency(priceM![1] ?? priceM![4]) } : {}),
|
...(price ? { price, currency: moneyCurrency(priceM![1] ?? priceM![4]) } : {}),
|
||||||
},
|
},
|
||||||
];
|
];
|
||||||
|
|||||||
@@ -150,4 +150,11 @@ describe('extractTotalPrice', () => {
|
|||||||
it('reads an Airbnb "Bezahlter Betrag"', () => {
|
it('reads an Airbnb "Bezahlter Betrag"', () => {
|
||||||
expect(extractTotalPrice(AIRBNB)).toEqual({ price: '651,86', currency: 'EUR' });
|
expect(extractTotalPrice(AIRBNB)).toEqual({ price: '651,86', currency: 'EUR' });
|
||||||
});
|
});
|
||||||
|
it('falls back to a standalone ¥ voucher price (JPY) with no nearby label', () => {
|
||||||
|
const voucher = 'Price (consumption tax included)\n金額(消費税込)\nPark Admission Date\n¥9,400\nAdult\n1-Day Passport';
|
||||||
|
expect(extractTotalPrice(voucher)).toEqual({ price: '9,400', currency: 'JPY' });
|
||||||
|
});
|
||||||
|
it('returns null when there is neither a labeled nor a symbol amount', () => {
|
||||||
|
expect(extractTotalPrice('Just some terms and conditions, no price here.')).toBeNull();
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
Reference in New Issue
Block a user