production fixes

This commit is contained in:
lovebird 2026-03-29 03:12:14 +02:00
parent f30b9a6f46
commit 02700a15f1
5 changed files with 229 additions and 179 deletions

192
dist/gpkg-reader.js vendored

File diff suppressed because one or more lines are too long

11
dist/wrapper.js vendored

File diff suppressed because one or more lines are too long

View File

@ -395,117 +395,133 @@ export async function getBoundaryFromGpkg(
if (externalCache) {
try {
const cached = await externalCache.get(cacheKey);
if (cached) return cached as BoundaryResult;
} catch (e) { /* ignore */ }
} else {
const countryCode = gadmId.split('.')[0];
for (const dir of uniqueCacheDirs) {
const cacheFile = join(dir, countryCode, `${cacheKey}.json`);
if (existsSync(cacheFile)) {
try {
return JSON.parse(readFileSync(cacheFile, 'utf-8')) as BoundaryResult;
} catch (e) {
console.warn(`[gpkg-reader] Failed to read cache from ${cacheFile}:`, e);
}
if (cached) {
return cached as BoundaryResult;
}
} catch (e) {
console.warn(`[gpkg-reader] External cache error for ${cacheKey}:`, e);
}
}
// fallback old flat structure
const oldCacheFile = join(dir, `${cacheKey}.json`);
if (existsSync(oldCacheFile)) {
try {
return JSON.parse(readFileSync(oldCacheFile, 'utf-8')) as BoundaryResult;
} catch (e) {
console.warn(`[gpkg-reader] Failed to read backup cache from ${oldCacheFile}:`, e);
}
const countryCode = gadmId.split('.')[0];
console.log(`----------------- [gpkg-reader] Checking local FS cache for ${cacheKey} (Country: ${countryCode})`);
for (const dir of uniqueCacheDirs) {
const cacheFile = join(dir, countryCode, `${cacheKey}.json`);
console.log(`[gpkg-reader] Checking exact cache file: ${cacheFile}`);
if (existsSync(cacheFile)) {
try {
console.log(`[gpkg-reader] HIT exact cache file: ${cacheFile}`);
return JSON.parse(readFileSync(cacheFile, 'utf-8')) as BoundaryResult;
} catch (e) {
console.warn(`[gpkg-reader] Failed to read cache from ${cacheFile}:`, e);
}
} else {
console.log(`[gpkg-reader] MISS exact cache file: ${cacheFile}`);
}
// Fallback: check for C++ pre-built cache (boundary_{COUNTRY}_{LEVEL}.json)
// The C++ pipeline outputs one file per country per level, containing ALL features.
// For sub-region queries (e.g. 'DEU.2_1' at level 3), we read the full country file
// and filter by GID prefix — since GADM GIDs are hierarchical (DEU.2.91.1_1 ⊂ DEU.2_1).
const dotCount = (gadmId.match(/\./g) || []).length;
const gidLevel = dotCount;
const resolvedLevel = contentLevel != null ? contentLevel : gidLevel;
// fallback old flat structure
const oldCacheFile = join(dir, `${cacheKey}.json`);
console.log(`[gpkg-reader] Checking flat cache file: ${oldCacheFile}`);
if (existsSync(oldCacheFile)) {
try {
console.log(`[gpkg-reader] HIT flat cache file: ${oldCacheFile}`);
return JSON.parse(readFileSync(oldCacheFile, 'utf-8')) as BoundaryResult;
} catch (e) {
console.warn(`[gpkg-reader] Failed to read backup cache from ${oldCacheFile}:`, e);
}
} else {
console.log(`[gpkg-reader] MISS flat cache file: ${oldCacheFile}`);
}
}
// C++ outputs sub-region precise files too if batched with `--split-levels`.
// We look for exact gadmId match first, then fall back to full country.
const fallbackNames = [
`boundary_${gadmId}_${resolvedLevel}.json`,
`boundary_${countryCode}_${resolvedLevel}.json`
];
// Fallback: check for C++ pre-built cache (boundary_{COUNTRY}_{LEVEL}.json)
const dotCount = (gadmId.match(/\./g) || []).length;
const gidLevel = dotCount;
const resolvedLevel = contentLevel != null ? contentLevel : gidLevel;
// Remove duplicates if gadmId == countryCode
const uniqueFallbackNames = [...new Set(fallbackNames)];
const fallbackNames = [
`boundary_${gadmId}_${resolvedLevel}.json`,
`boundary_${countryCode}_${resolvedLevel}.json`
];
// Build a prefix for sub-region filtering (used mostly for the full-country fallback)
const isSubRegion = gadmId.includes('.');
const gidPrefix = isSubRegion
? gadmId.replace(/_\d+$/, '') + '.' // strip version suffix, add dot
: null;
const uniqueFallbackNames = [...new Set(fallbackNames)];
for (const cppFileName of uniqueFallbackNames) {
for (const dir of uniqueCacheDirs) {
let cppFile = join(dir, countryCode, cppFileName);
if (!existsSync(cppFile)) {
cppFile = join(dir, cppFileName);
}
if (existsSync(cppFile)) {
try {
const raw = JSON.parse(readFileSync(cppFile, 'utf-8'));
if (raw.features && Array.isArray(raw.features)) {
console.log(`[gpkg-reader] Loading from CPP Cache File ${cppFile} : ${gidPrefix || 'country-wide'}`)
const isSubRegion = gadmId.includes('.');
const baseGid = isSubRegion ? gadmId.replace(/_\d+$/, '') : gadmId;
const gidPrefix = isSubRegion ? baseGid + '.' : null;
let rawFeatures = raw.features;
for (const cppFileName of uniqueFallbackNames) {
for (const dir of uniqueCacheDirs) {
let cppFile = join(dir, countryCode, cppFileName);
if (!existsSync(cppFile)) {
cppFile = join(dir, cppFileName);
}
if (existsSync(cppFile)) {
try {
const raw = JSON.parse(readFileSync(cppFile, 'utf-8'));
if (raw.features && Array.isArray(raw.features)) {
let rawFeatures = raw.features;
// Filter by GID prefix if we had to read the fallback country-wide file
// (If we loaded the exact sub-region file, it's already perfectly chunked)
if (gidPrefix && cppFileName.includes(countryCode) && !cppFileName.includes(gadmId)) {
rawFeatures = rawFeatures.filter((f: any) =>
f.code && f.code.startsWith(gidPrefix)
);
}
if (rawFeatures.length === 0) continue; // no matches, try next dir
const features: BoundaryFeature[] = rawFeatures.map((f: any) => {
const { geometry, code, name, ...enrichment } = f;
return {
type: 'Feature' as const,
properties: { name, code, ...enrichment },
geometry,
};
if (gidPrefix && cppFileName.includes(countryCode) && !cppFileName.includes(gadmId)) {
rawFeatures = rawFeatures.filter((f: any) => {
if (!f.code) return false;
if (resolvedLevel === gidLevel) {
return f.code === gadmId || f.code.replace(/_\d+$/, '') === baseGid;
}
return f.code.startsWith(gidPrefix);
});
const result: BoundaryResult = { type: 'FeatureCollection', features };
const targetBase = uniqueCacheDirs[1] || uniqueCacheDirs[0];
const targetDir = join(targetBase, countryCode);
if (!existsSync(targetDir)) {
mkdirSync(targetDir, { recursive: true });
}
const outCacheFile = join(targetDir, `${cacheKey}.json`);
try {
writeFileSync(outCacheFile, JSON.stringify(result));
} catch (e) { /* ignore */ }
return result;
}
} catch (e) {
console.warn(`[gpkg-reader] Failed to read C++ cache from ${cppFile}:`, e);
if (rawFeatures.length === 0) {
continue;
}
const features: BoundaryFeature[] = rawFeatures.map((f: any) => {
const { geometry, code, name, ...enrichment } = f;
return {
type: 'Feature' as const,
properties: { name, code, ...enrichment },
geometry,
};
});
const result: BoundaryResult = { type: 'FeatureCollection', features };
const targetBase = uniqueCacheDirs[1] || uniqueCacheDirs[0];
const targetDir = join(targetBase, countryCode);
if (!existsSync(targetDir)) {
mkdirSync(targetDir, { recursive: true });
}
const outCacheFile = join(targetDir, `${cacheKey}.json`);
try {
writeFileSync(outCacheFile, JSON.stringify(result));
} catch (e) {
console.warn(`[gpkg-reader] Failed to save JS cache: ${outCacheFile}`, e);
}
return result;
} else {
console.log(`[gpkg-reader] CPP file had no .features array: ${cppFile}`);
}
} catch (e) {
console.warn(`[gpkg-reader] Failed to read C++ cache from ${cppFile}:`, e);
}
} else {
console.log(`[gpkg-reader] MISS CPP fallback path: ${cppFile}`);
}
}
}
if (!ensureDb()) return null;
console.log(`[gpkg-reader] No cache found for ${gadmId}, falling back to SQLite GeoPackage`);
if (!ensureDb()) {
console.error(`[gpkg-reader] GeoPackage DB init failed for ${gadmId}, returning null`);
return null;
}
const db = sharedDb;
const tableName = sharedTableName;
// Determine the GID's own level from dot count: "ESP" = L0, "ESP.6_1" = L1
const dotCount = (gadmId.match(/\./g) || []).length;
const gidLevel = dotCount;
// (dotCount and gidLevel already defined above)
const level = contentLevel != null ? contentLevel : gidLevel;
// Columns to select
@ -612,7 +628,6 @@ export async function getBoundaryFromGpkg(
if (!existsSync(targetDir)) {
mkdirSync(targetDir, { recursive: true });
}
console.log('GADM Writing External cache:', targetFile);
writeFileSync(targetFile, JSON.stringify(result));
} catch (e) {
console.warn(`[gpkg-reader] Failed to write cache for ${gadmId}:`, e);

View File

@ -7,7 +7,7 @@
*/
import { getNames } from './names.js';
import { getBoundaryFromGpkg, type GadmCache } from './gpkg-reader.js';
import CONTINENT_MAP_JSON from '../data/gadm_continent.json' with { type: 'json' };
// import CONTINENT_MAP_JSON from '../data/gadm_continent.json' with { type: 'json' };
// ---------- types ----------

View File

@ -171,13 +171,18 @@ export async function getBoundary(
resolution: number = 3
): Promise<GeoJSONCollection | { error: string }> {
console.log('getBoundary', gadmId, contentLevel, enrichOptions, resolution, cache);
const enrichKeySuffix = enrichOptions ? '_enriched' : '';
const keySuffix = `${contentLevel ?? 'auto'}_${gadmId}${enrichKeySuffix}`;
const key = getCacheKey(`boundary`, keySuffix);
// 1. Check if we already have the EXACT requested state cached
const cached = await readCache<GeoJSONCollection>(key, cache);
if (cached) return cached;
if (cached) {
console.log('getBoundary cache hit', key);
return cached;
}
// 2. Fetch the base geometry
let baseCollection: GeoJSONCollection | null = null;
@ -185,14 +190,18 @@ export async function getBoundary(
// First try the far superior SQLite GeoPackage
const gpkgRes = await getBoundaryFromGpkg(gadmId, contentLevel, cache, resolution);
if (gpkgRes) {
console.log('getBoundary gpkgRes', gpkgRes);
baseCollection = gpkgRes;
} else {
// Fallback exactly as before to Parquet mode
const baseKey = getCacheKey(`boundary_${contentLevel ?? 'auto'}`, gadmId);
const baseCached = await readCache<GeoJSONCollection>(baseKey, cache);
if (baseCached) {
console.log('getBoundary baseCached', baseCached);
baseCollection = baseCached;
} else {
console.log('getBoundary baseCached miss', baseCached);
try {
const gdf = await getItems({ admin: [gadmId], contentLevel, cache });
if (gdf.features.length === 0) {
@ -213,6 +222,7 @@ export async function getBoundary(
(f: any) => f.properties?.ghsPopulation !== undefined
);
if (enrichOptions && !alreadyEnriched && baseCollection && baseCollection.features) {
console.log('getBoundary enrichOptions', enrichOptions);
// Deep clone so we don't mutate an in-memory cached object accidentally
collectionToReturn = JSON.parse(JSON.stringify(baseCollection));