mono/packages/search/lib/googlemaps.js
2025-03-11 11:28:14 +01:00

455 lines
21 KiB
JavaScript

"use strict";
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
return new (P || (P = Promise))(function (resolve, reject) {
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
step((generator = generator.apply(thisArg, _arguments || [])).next());
});
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.each = exports.migrate = exports.googleMaps = exports.resolvePath = exports.parse = exports.searchGoogleMap = exports.searchVendorSA = exports.defaultSearchParamsMapsES = exports.defaultParamsGoogleES = exports.searchVendor = exports.yargsOptionsEach = exports.yargsOptions = exports.zodSchemaEach = exports.zodSchemaEachExtras = exports.zodSchema = exports.getStored = exports.store = exports.locationString = exports.home = exports.SearchQueriesES = void 0;
const path = require("path");
const url_1 = require("url");
const osr_commons_1 = require("@plastichub/osr-commons");
const osr_commons_2 = require("@plastichub/osr-commons");
const osr_commons_3 = require("@plastichub/osr-commons");
const lib_1 = require("@plastichub/osr-cli-commons/lib");
const zod_1 = require("zod");
const write_1 = require("@plastichub/fs/write");
const read_1 = require("@plastichub/fs/read");
const exists_1 = require("@plastichub/fs/exists");
const objects_1 = require("@plastichub/core/objects");
const primitives_1 = require("@plastichub/core/primitives");
const pMap = require("p-map");
const lib_2 = require("@plastichub/osr-cache/lib");
const osr_commons_4 = require("@plastichub/osr-commons");
const __1 = require("../");
const _1 = require("./");
const email_1 = require("./email");
const constants_1 = require("./constants");
const html_1 = require("./html");
const geo_1 = require("./geo");
const report_map_1 = require("../lib/report_map");
const geo_2 = require("./geo");
const osr_registry_1 = require("@plastichub/osr-registry");
const MODULE_NAME = 'osr-search';
const queryExtras = '';
const blUrls = ['bazar.preciousplastic.com'];
var SearchQueriesES;
(function (SearchQueriesES) {
SearchQueriesES["INJECTION"] = "inyecci\u00F3n de plastico";
})(SearchQueriesES || (exports.SearchQueriesES = SearchQueriesES = {}));
const home = () => "41.6911354,2.1652746";
exports.home = home;
const locationString = (coords, zoom = 13) => `@${coords},${zoom}z`;
exports.locationString = locationString;
const store = (storePath_1, ...args_1) => __awaiter(void 0, [storePath_1, ...args_1], void 0, function* (storePath, ns = 'osr-search') { return (0, osr_registry_1.store)(storePath, ns); });
exports.store = store;
const getStored = (title_1, storePath_1, ...args_1) => __awaiter(void 0, [title_1, storePath_1, ...args_1], void 0, function* (title, storePath, ns = 'osr-search') { return (0, osr_registry_1.store)(storePath, ns).get(title); });
exports.getStored = getStored;
const zodSchema = () => zod_1.z.object({
api_key: zod_1.z.string().optional().describe('API Key'),
cache: zod_1.z.boolean().default(true),
category: zod_1.z.string().optional().default('category'),
dst: zod_1.z.string().default('${OSR_CUSTOMER_DRIVE}/campaign/maps/${FROM}/${CATEGORY}/${QUERY}-10.xls'),
dump: zod_1.z.string().optional(),
engine: zod_1.z.string().default('google_maps'),
env_key: zod_1.z.string().default('OSR-CONFIG'),
filterCity: zod_1.z.string().optional(),
filterCountry: zod_1.z.string().optional(),
filterType: zod_1.z.string().optional(),
findEMail: zod_1.z.boolean().default(false),
geocode_key: zod_1.z.string().optional(),
google_domain: zod_1.z.string().default('google.com'),
headless: zod_1.z.boolean().default(true).describe('Headless mode'),
language: zod_1.z.string().default('en'),
limit: zod_1.z.number().default(5),
logLevel: zod_1.z.string().default('info'),
meta: zod_1.z.boolean().default(true),
searchCache: zod_1.z.boolean().default(true).describe('Use search cache'),
query: zod_1.z.string().default('plastichub'),
searchCoord: zod_1.z.string().optional(),
searchFrom: zod_1.z.string().optional(),
source: zod_1.z.string().optional(),
type: zod_1.z.string().optional().default('search'),
zoom: zod_1.z.number().optional().default(13),
index: zod_1.z.string().default('${OSR_ROOT}/osr-directory/meta/index.json').describe('Index file'),
store: zod_1.z.string().default('${OSR_ROOT}/osr-directory/meta/index.db').describe('Index store'),
}, { description: 'IOptionsGoogleMaps' });
exports.zodSchema = zodSchema;
const zodSchemaEachExtras = () => zod_1.z.object({
logLevel: zod_1.z.string().default('info'),
log: zod_1.z.string().optional(),
country: zod_1.z.string().describe('The country to search in, variable ${COUNTRY}'),
area: zod_1.z.string().describe('The city to search in, variable ${AREA}'),
list: zod_1.z.string().describe('List of items to process, FILE|GLOB|AI-Query, provided as ${TOWN}'),
cwd: zod_1.z.string().optional().default('./').describe('the current working directory to use, otherwise . is being assumed'),
env: zod_1.z.string().default(''),
profile: zod_1.z.string().default('${OSR_ROOT}/osr-templates/osrl/.osrl.json'),
migrate: zod_1.z.boolean().default(false),
});
exports.zodSchemaEachExtras = zodSchemaEachExtras;
const zodSchemaEach = () => (0, exports.zodSchema)().merge((0, exports.zodSchemaEachExtras)()).describe('IOptionsGoogleMapsEach');
exports.zodSchemaEach = zodSchemaEach;
const yargsOptions = (yargs) => (0, osr_commons_3.toYargs)(yargs, (0, exports.zodSchema)());
exports.yargsOptions = yargsOptions;
const yargsOptionsEach = (yargs) => (0, osr_commons_3.toYargs)(yargs, (0, exports.zodSchemaEach)());
exports.yargsOptionsEach = yargsOptionsEach;
const searchVendor = (name, dst, opts) => __awaiter(void 0, void 0, void 0, function* () {
let q = name;
let ret = yield _1.SearchProviders.scaleserp({
api_key: opts.api_key,
q: q + queryExtras
});
let urls = ret.organic_results.filter((u) => {
return !blUrls.includes(new url_1.URL(u.link).hostname);
});
urls = urls.map((u) => u.link);
dst && (0, write_1.sync)(dst, urls);
return urls;
});
exports.searchVendor = searchVendor;
const defaultParamsGoogleES = (query, mixin) => {
return Object.assign({ location: constants_1.defaultFromLocation, hl: constants_1.defaultLanguage, gl: constants_1.defaultLanguage, google_domain: constants_1.defaultGoogleDomain, q: query }, mixin);
};
exports.defaultParamsGoogleES = defaultParamsGoogleES;
const defaultSearchParamsMapsES = (query, zoom, mixin = {}) => {
return Object.assign({ "engine": constants_1.defaultEngine, "type": "search", "q": query, "ll": (0, exports.locationString)((0, exports.home)(), zoom), "google_domain": constants_1.defaultGoogleDomain, "hl": constants_1.defaultLanguage }, mixin);
};
exports.defaultSearchParamsMapsES = defaultSearchParamsMapsES;
const searchVendorSA = (query, location, key, opts) => __awaiter(void 0, void 0, void 0, function* () {
const googleParams = Object.assign(Object.assign({ api_key: key, location: location, hl: "en", gl: "us", google_domain: "google.com" }, opts), { q: query + queryExtras });
return yield _1.SearchProviders.serpApi("google", googleParams);
});
exports.searchVendorSA = searchVendorSA;
const searchGoogleMap = (query, key, opts) => __awaiter(void 0, void 0, void 0, function* () {
var _a, _b;
const googleParams = Object.assign(Object.assign({}, opts), { api_key: key, q: query + queryExtras, ll: opts.searchCoord });
let results = [];
let pageIdx = 0;
let index = opts.index ? (0, read_1.sync)(opts.index, 'json') || {} : {};
const params = googleParams;
let cached;
const cache_key = {
engine: params.engine,
type: params.type,
q: params.q,
google_domain: params.google_domain,
hl: params.hl,
zoom: params.zoom,
searchFrom: params.searchFrom,
limit: params.limit
};
if (opts.searchCache && (0, osr_commons_4.OSR_CACHE)()) {
const roundCoords = (coords, decimals = 3) => {
const [latitude, longitude, zoom] = coords.split(',').map((part, index) => {
if (index < 2) {
return parseFloat(parseFloat(part).toFixed(decimals));
}
return part;
});
return `@${latitude},${longitude},${zoom}`;
};
cached = yield (0, lib_2.get_cached_object)(cache_key, MODULE_NAME);
}
let page = cached || (yield _1.SearchProviders.serpApi(googleParams.engine, Object.assign({}, googleParams)));
while (page && page.local_results) {
page.local_results.forEach((r) => {
r.page = pageIdx;
});
results.push(...page.local_results);
if (results.length >= opts.limit)
break;
pageIdx++;
page = yield ((_a = page.next) === null || _a === void 0 ? void 0 : _a.call(page));
}
if (page.place_results && !(0, primitives_1.isArray)(page.place_results)) {
page.place_results = [page.place_results];
}
while (page && page.place_results) {
page.place_results.forEach((r) => {
r.page = pageIdx;
});
results.push(...page.place_results);
if (results.length >= opts.limit)
break;
pageIdx++;
page = yield ((_b = page.next) === null || _b === void 0 ? void 0 : _b.call(page));
}
if (opts.searchCache && (0, osr_commons_4.OSR_CACHE)()) {
(0, lib_2.set_cached_object)(cache_key, MODULE_NAME, results);
}
let idx = 0;
//const cachedLoc = async (title: string) => getStored(title, opts.store, MODULE_NAME)
yield pMap(results, (entry) => __awaiter(void 0, void 0, void 0, function* () {
idx++;
entry.position = entry.page * constants_1.PAGE_SIZE + idx;
try {
if (index[entry.title] && index[entry.title].geo) {
entry.geo = index[entry.title].geo;
return;
}
return (0, geo_1.reverse)(entry, opts);
}
catch (e) {
__1.logger.error(`Error reverse geocoding ${entry.title}`);
entry.geo = geo_1.REVERSE_DEFAULT;
}
}), { concurrency: 3 });
//logger.debug(`search ${query} with ${params.ll} / ${params.searchFrom} @ ${opts.zoom} : ${results.length} items`)
if (opts.filterCity) {
results = results.filter((r) => r.geo.city.toLowerCase() === opts.filterCity.toLowerCase());
}
if (opts.filterCountry) {
results = results.filter((r) => r.geo.countryName.toLowerCase() === opts.filterCountry.toLowerCase());
}
if (opts.filterContinent) {
results = results.filter((r) => r.geo.continent.toLowerCase() === opts.filterContinent.toLowerCase());
}
if (opts.filterType) {
results = results.filter((r) => r.filterType === opts.filterType);
}
results = results.filter((r) => r.gps_coordinates);
const beforeCached = results.length;
results = results.filter((r) => {
return index[r.title] == null || !index[r.title].geo || !index[r.title].meta;
});
__1.logger.info(`search ${query} with ${params.ll} / ${params.searchFrom} : ${results.length} items | ${beforeCached} before cache`);
results = results.slice(0, opts.limit);
if (opts.meta) {
yield pMap(results, (entry) => {
if (entry.meta || !entry.website || entry.rejected) {
return;
}
try {
if (index[entry.title] && index[entry.title].meta) {
entry.meta = index[entry.title].meta;
return;
}
return (0, html_1.meta)(entry, opts);
}
catch (e) {
// entry.meta = {}
}
}, { concurrency: 1 });
}
if (opts.findEMail && opts.meta) {
const emails = yield pMap(results, (entry) => __awaiter(void 0, void 0, void 0, function* () {
if (index[entry.title] && index[entry.title].email) {
entry.email = index[entry.title].email;
return;
}
if (entry.meta && entry.website && !entry.email) {
try {
//logger.debug(`searching email for ${entry.website}`)
return (0, email_1.findEMail)(constants_1.SEARCH_AI_PROMPTS.GET_EMAIL, entry.website, opts, entry);
}
catch (e) {
__1.logger.error(`Error retrieving EMail data ${entry.title}`);
}
}
}), { concurrency: 1 });
}
return results;
});
exports.searchGoogleMap = searchGoogleMap;
const parse = (argv) => {
const args = argv;
__1.logger.setSettings({ minLevel: args.logLevel || 'info' });
const config = (0, osr_commons_1.CONFIG_DEFAULT)(args.env_key);
if (!config) {
__1.logger.warn('No config found!');
return;
}
if (config && !config.serpapi.key) {
__1.logger.warn('No serpapi key found in config!');
return;
}
const opts = Object.assign(Object.assign(Object.assign({ query: argv.query }, (0, exports.defaultSearchParamsMapsES)(argv.query, argv.zoom)), argv), { api_key: argv.api_key || config.serpapi.key, geocode_key: argv.geocode_key || config.geocoder.key, openai: config.openai, headless: argv.headless ? true : false, bigdata: { key: config.bigdata.key } });
opts.source && (0, primitives_1.isString)(opts.source) && (opts.source = path.resolve((0, osr_commons_2.resolve)(args.source, false)));
if ((0, primitives_1.isString)(opts.source)) {
if ((0, exists_1.sync)(opts.source)) {
opts.source = (0, read_1.sync)(opts.source, 'json');
}
else {
__1.logger.error(`Source file ${args.source} not found : ${opts.source}`);
return;
}
}
if (!opts.source && !opts.query) {
__1.logger.warn(`Invalid source and query`);
return;
}
if (opts.index) {
opts.index = path.resolve((0, osr_commons_2.resolve)(args.index, false));
}
if (opts.store) {
opts.store = path.resolve((0, osr_commons_2.resolve)(args.store, false));
}
if (!opts.source) {
opts.source = {};
opts.source[opts.category] = [opts.query];
}
if (!opts.api_key) {
__1.logger.error('No Serpapi key found in config or options!');
return;
}
if (!opts.query) {
__1.logger.error('No query specified');
return;
}
return opts;
};
exports.parse = parse;
const resolvePath = (str, query, category, opts) => {
return path.resolve((0, osr_commons_2.resolve)(str, false, Object.assign({ QUERY: query, FROM: opts.searchFrom.split(',').map((s) => s.trim()).join('/'), ENGINE: opts.engine, DOMAIN: opts.google_domain, LANG: opts.language, COUNTRY: opts.country, AREA: opts.area, CATEGORY: category || 'unknown' }, opts.variables || {})));
};
exports.resolvePath = resolvePath;
const googleMaps = (opts) => __awaiter(void 0, void 0, void 0, function* () {
opts = (0, exports.parse)(opts);
if (!opts) {
__1.logger.error('Invalid options', opts);
return;
}
try {
const searchFrom = (0, osr_commons_2.substitute)(false, opts.searchFrom, opts.variables);
if (searchFrom && opts.geocode_key && !opts.searchCoord) {
const coords = yield (0, geo_2.geocode_forward)(searchFrom, opts.geocode_key);
if (coords) {
opts.searchCoord = (0, exports.locationString)(coords, opts.zoom);
}
else {
__1.logger.error('Error geocoding', searchFrom);
}
}
}
catch (error) {
__1.logger.error('Error geocoding', error, error.stack);
}
let ret = [];
const search = (query, category, opts) => __awaiter(void 0, void 0, void 0, function* () {
opts = (0, objects_1.clone)(opts);
opts.dst = (0, exports.resolvePath)(path.join(opts.cwd || '', opts.dst || ''), query, category, opts);
if (opts.cache !== false && (0, exists_1.sync)(opts.dst + '.json')) {
const cachedPath = opts.dst + '.json';
const cached = (0, read_1.sync)(cachedPath, 'json') || [];
__1.logger.debug(`Searching ${opts.query} with ${opts.searchFrom} :: returning cached ${cached.length}`);
ret = [...ret, ...cached];
return cached;
}
try {
__1.logger.debug(`Searching ${opts.query} with ${opts.searchFrom} :: 3`);
const sr = yield (0, exports.searchGoogleMap)(query, opts.api_key, Object.assign({}, opts));
if (sr && sr.length && opts.dst) {
__1.logger.debug('Writing', opts.dst);
(0, write_1.sync)(opts.dst + '.json', sr);
(0, report_map_1.writeReport)(sr, opts.dst, opts);
const parts = path.parse(opts.dst);
(0, write_1.sync)(path.join(parts.dir, parts.name + '_options.json'), (0, _1.cleanOptions)(opts));
// writeReport(sr, opts.dst.replace('.xlsx', '.md'), opts)
}
ret = [...ret, ...sr];
__1.logger.debug(`Searching ${opts.query} with ${opts.searchFrom} :: 4`);
// closeAppByName('Chromium')
return ret;
}
catch (error) {
__1.logger.error('Error searching GoogleMaps : ' + error.message, error, error.stack);
}
});
const all = yield pMap(Object.keys(opts.source), (k) => {
return pMap(opts.source[k], (t) => {
return search(t, k, opts);
}, {
concurrency: 1
});
}, {
concurrency: 1
});
if (opts.dst) {
opts.dst = (0, exports.resolvePath)(opts.dst, 'all', 'all', opts);
if ((0, exists_1.sync)(opts.dst + '.json')) {
const last = ((0, read_1.sync)(opts.dst + '.json', 'json') || []);
ret = [...last, ...ret];
}
(0, write_1.sync)(opts.dst + '.json', ret);
(0, report_map_1.writeReport)(ret, opts.dst, opts);
}
if (opts.index) {
let index = (0, read_1.sync)(opts.index, 'json') || {};
ret.forEach((r) => {
if (!index[r.title]) {
index[r.title] = r;
}
});
(0, write_1.sync)(opts.index, index);
}
return all;
});
exports.googleMaps = googleMaps;
const migrate = (opts) => __awaiter(void 0, void 0, void 0, function* () {
if (!opts.store) {
__1.logger.error('No store provided');
return;
}
if (!opts.index) {
__1.logger.error('No index provided');
return;
}
let index = (0, read_1.sync)(opts.index, 'json') || {};
if (!(0, primitives_1.isArray)(index) && (0, primitives_1.isObject)(index)) {
index = Object.keys(index).map((k) => index[k]);
}
const ns = 'osr-search';
let _store = (0, exports.store)(opts.store, ns);
const ret = yield pMap(index, (r) => __awaiter(void 0, void 0, void 0, function* () {
// return _store.set(r.title, r)
}));
return ret;
});
exports.migrate = migrate;
const each = (opts) => __awaiter(void 0, void 0, void 0, function* () {
__1.logger.setSettings({ minLevel: opts.logLevel });
let items = [];
let listPath = path.resolve((0, osr_commons_2.resolve)(opts.list));
const profile = (0, osr_commons_1.parseProfile)(opts.profile, {
variables: Object.assign({ AREA: opts.area, COUNTRY: opts.country }, osr_commons_1.DEFAULT_ROOTS), includes: [], env: {}
}, { env: opts.env });
opts = (0, exports.parse)(opts);
if (!opts) {
__1.logger.error('Invalid options', opts);
return;
}
if (opts.migrate) {
return (0, exports.migrate)(opts);
}
const list = yield (0, lib_1.parseCustomUrl)((0, osr_commons_2.substitute)(false, opts.list, profile.variables));
if ((0, primitives_1.isArray)(list)) {
items = list;
}
else if ((0, exists_1.sync)(listPath) && (0, osr_commons_2.isFile)(listPath) && path.parse(listPath).ext === '.json') {
items = (0, read_1.sync)(listPath, 'json') || [];
}
else if ((0, osr_commons_1.pathInfo)(opts.list).IS_GLOB) {
items = (0, osr_commons_1.filesEx)(path.resolve((0, osr_commons_2.resolve)(opts.cwd)) || './', opts.list) || [];
}
if (!items || items.length === 0) {
__1.logger.error('osr-cli::each: invalid list or empty list');
return;
}
items = items.filter((item) => !!item);
__1.logger.debug(`${items.length} items`);
(0, write_1.sync)(path.join(path.resolve((0, osr_commons_2.resolve)(opts.cwd), 'list.json')), items);
const all = yield pMap(items, (KEY) => {
const variables = Object.assign({ KEY, TOWN: KEY }, profile.variables);
const googleOpts = Object.assign(Object.assign({}, opts), { query: (0, osr_commons_2.substitute)(false, opts.query, variables), dst: (0, osr_commons_2.substitute)(false, opts.dst, variables), searchFrom: (0, osr_commons_2.substitute)(false, opts.searchFrom, variables), variables });
const ret = (0, exports.googleMaps)(googleOpts);
return ret;
}, { concurrency: 1 });
opts.log && (0, write_1.sync)(path.resolve((0, osr_commons_2.resolve)(opts.log)), all);
return all;
});
exports.each = each;
//# sourceMappingURL=googlemaps.js.map