search | registry esm

This commit is contained in:
babayaga 2025-11-21 23:29:34 +01:00
parent 3daa35b99f
commit e37cd8be67
13 changed files with 276 additions and 774 deletions

View File

@ -1,254 +1,14 @@
import * as CLI from 'yargs';
export type ParsedURL = {
scheme: string;
host?: string;
path?: string;
query?: Record<string, string>;
fragment?: string;
};
export declare const escapeFirstUrlSegment: (url: string) => string;
export declare const handleFs: (path: string) => Promise<string | object>;
export declare const schemeHandlers: Record<string, (arg1: string, arg2?: URLSearchParams) => Promise<string | object>>;
export declare const parseCustomUrl: (url: string) => Promise<string | object>;
import { z } from 'zod';
import { zodSchema, zodSchemaEach, yargsOptions, yargsOptionsEach, IOptionsGoogleMaps, IOptionsGoogleMapsEach } from './googlemaps-zod.js';
export { zodSchema, zodSchemaEach, yargsOptions, yargsOptionsEach, IOptionsGoogleMaps, IOptionsGoogleMapsEach };
import type { GoogleParameters } from "serpapi";
import { IScaleserpSearch } from './types.js';
import { IOptionsGoogleMaps, IOptionsGoogleMapsEach } from './types-googlemaps.js';
import { LocalResult } from './map_types.js';
export declare enum SearchQueriesES {
INJECTION = "inyecci\u00F3n de plastico"
}
export declare const home: () => string;
export declare const locationString: (coords: string, zoom?: number) => string;
export declare const store: (storePath: string, ns?: string) => Promise<any>;
export declare const getStored: (title: string, storePath: string, ns?: string) => Promise<any>;
export declare const zodSchema: () => z.ZodObject<{
api_key: z.ZodOptional<z.ZodString>;
cache: z.ZodDefault<z.ZodBoolean>;
category: z.ZodDefault<z.ZodOptional<z.ZodString>>;
dst: z.ZodDefault<z.ZodString>;
dump: z.ZodOptional<z.ZodString>;
engine: z.ZodDefault<z.ZodString>;
env_key: z.ZodDefault<z.ZodString>;
filterCity: z.ZodOptional<z.ZodString>;
filterCountry: z.ZodOptional<z.ZodString>;
filterType: z.ZodOptional<z.ZodString>;
findEMail: z.ZodDefault<z.ZodBoolean>;
geocode_key: z.ZodOptional<z.ZodString>;
google_domain: z.ZodDefault<z.ZodString>;
headless: z.ZodDefault<z.ZodBoolean>;
language: z.ZodDefault<z.ZodString>;
limit: z.ZodDefault<z.ZodNumber>;
logLevel: z.ZodDefault<z.ZodString>;
meta: z.ZodDefault<z.ZodBoolean>;
searchCache: z.ZodDefault<z.ZodBoolean>;
query: z.ZodDefault<z.ZodString>;
searchCoord: z.ZodOptional<z.ZodString>;
searchFrom: z.ZodDefault<z.ZodOptional<z.ZodString>>;
source: z.ZodOptional<z.ZodString>;
type: z.ZodDefault<z.ZodOptional<z.ZodString>>;
zoom: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
index: z.ZodDefault<z.ZodString>;
store: z.ZodDefault<z.ZodString>;
}, "strip", z.ZodTypeAny, {
api_key?: string;
cache?: boolean;
category?: string;
dst?: string;
dump?: string;
engine?: string;
env_key?: string;
filterCity?: string;
filterCountry?: string;
filterType?: string;
findEMail?: boolean;
geocode_key?: string;
google_domain?: string;
headless?: boolean;
language?: string;
limit?: number;
logLevel?: string;
meta?: boolean;
searchCache?: boolean;
query?: string;
searchCoord?: string;
searchFrom?: string;
source?: string;
type?: string;
zoom?: number;
index?: string;
store?: string;
}, {
api_key?: string;
cache?: boolean;
category?: string;
dst?: string;
dump?: string;
engine?: string;
env_key?: string;
filterCity?: string;
filterCountry?: string;
filterType?: string;
findEMail?: boolean;
geocode_key?: string;
google_domain?: string;
headless?: boolean;
language?: string;
limit?: number;
logLevel?: string;
meta?: boolean;
searchCache?: boolean;
query?: string;
searchCoord?: string;
searchFrom?: string;
source?: string;
type?: string;
zoom?: number;
index?: string;
store?: string;
}>;
export declare const zodSchemaEachExtras: () => z.ZodObject<{
logLevel: z.ZodDefault<z.ZodString>;
log: z.ZodOptional<z.ZodString>;
country: z.ZodString;
area: z.ZodString;
list: z.ZodString;
cwd: z.ZodDefault<z.ZodOptional<z.ZodString>>;
env: z.ZodDefault<z.ZodString>;
profile: z.ZodDefault<z.ZodString>;
migrate: z.ZodDefault<z.ZodBoolean>;
}, "strip", z.ZodTypeAny, {
logLevel?: string;
log?: string;
country?: string;
area?: string;
list?: string;
cwd?: string;
env?: string;
profile?: string;
migrate?: boolean;
}, {
logLevel?: string;
log?: string;
country?: string;
area?: string;
list?: string;
cwd?: string;
env?: string;
profile?: string;
migrate?: boolean;
}>;
export declare const zodSchemaEach: () => z.ZodObject<{
api_key: z.ZodOptional<z.ZodString>;
cache: z.ZodDefault<z.ZodBoolean>;
category: z.ZodDefault<z.ZodOptional<z.ZodString>>;
dst: z.ZodDefault<z.ZodString>;
dump: z.ZodOptional<z.ZodString>;
engine: z.ZodDefault<z.ZodString>;
env_key: z.ZodDefault<z.ZodString>;
filterCity: z.ZodOptional<z.ZodString>;
filterCountry: z.ZodOptional<z.ZodString>;
filterType: z.ZodOptional<z.ZodString>;
findEMail: z.ZodDefault<z.ZodBoolean>;
geocode_key: z.ZodOptional<z.ZodString>;
google_domain: z.ZodDefault<z.ZodString>;
headless: z.ZodDefault<z.ZodBoolean>;
language: z.ZodDefault<z.ZodString>;
limit: z.ZodDefault<z.ZodNumber>;
meta: z.ZodDefault<z.ZodBoolean>;
searchCache: z.ZodDefault<z.ZodBoolean>;
query: z.ZodDefault<z.ZodString>;
searchCoord: z.ZodOptional<z.ZodString>;
searchFrom: z.ZodDefault<z.ZodOptional<z.ZodString>>;
source: z.ZodOptional<z.ZodString>;
type: z.ZodDefault<z.ZodOptional<z.ZodString>>;
zoom: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
index: z.ZodDefault<z.ZodString>;
store: z.ZodDefault<z.ZodString>;
} & {
logLevel: z.ZodDefault<z.ZodString>;
log: z.ZodOptional<z.ZodString>;
country: z.ZodString;
area: z.ZodString;
list: z.ZodString;
cwd: z.ZodDefault<z.ZodOptional<z.ZodString>>;
env: z.ZodDefault<z.ZodString>;
profile: z.ZodDefault<z.ZodString>;
migrate: z.ZodDefault<z.ZodBoolean>;
}, "strip", z.ZodTypeAny, {
api_key?: string;
cache?: boolean;
category?: string;
dst?: string;
dump?: string;
engine?: string;
env_key?: string;
filterCity?: string;
filterCountry?: string;
filterType?: string;
findEMail?: boolean;
geocode_key?: string;
google_domain?: string;
headless?: boolean;
language?: string;
limit?: number;
logLevel?: string;
meta?: boolean;
searchCache?: boolean;
query?: string;
searchCoord?: string;
searchFrom?: string;
source?: string;
type?: string;
zoom?: number;
index?: string;
store?: string;
log?: string;
country?: string;
area?: string;
list?: string;
cwd?: string;
env?: string;
profile?: string;
migrate?: boolean;
}, {
api_key?: string;
cache?: boolean;
category?: string;
dst?: string;
dump?: string;
engine?: string;
env_key?: string;
filterCity?: string;
filterCountry?: string;
filterType?: string;
findEMail?: boolean;
geocode_key?: string;
google_domain?: string;
headless?: boolean;
language?: string;
limit?: number;
logLevel?: string;
meta?: boolean;
searchCache?: boolean;
query?: string;
searchCoord?: string;
searchFrom?: string;
source?: string;
type?: string;
zoom?: number;
index?: string;
store?: string;
log?: string;
country?: string;
area?: string;
list?: string;
cwd?: string;
env?: string;
profile?: string;
migrate?: boolean;
}>;
export declare const yargsOptions: (yargs: CLI.Argv) => CLI.Argv;
export declare const yargsOptionsEach: (yargs: CLI.Argv) => CLI.Argv;
export declare const searchVendor: (name: string, dst: string, opts: IScaleserpSearch) => Promise<import("./types.js").OrganicResult[]>;
export declare const defaultParamsGoogleES: (query: any, mixin: any) => any;
export declare const defaultSearchParamsMapsES: (query: any, zoom: any, mixin?: {}) => {
@ -260,9 +20,8 @@ export declare const defaultSearchParamsMapsES: (query: any, zoom: any, mixin?:
hl: string;
};
export declare const searchVendorSA: (query: string, location: string, key: string, opts: GoogleParameters) => Promise<import("serpapi").BaseResponse<GoogleParameters>>;
export declare const searchGoogleMap: (query: string, key: string, opts: any) => Promise<any[]>;
export declare const parse: (argv: any) => any;
export declare const resolvePath: (str: string, query: any, category: any, opts: any) => string;
export declare const searchGoogleMap: (query: string, key: string, opts: IOptionsGoogleMaps) => Promise<LocalResult[]>;
export declare const parse: (argv: IOptionsGoogleMaps) => IOptionsGoogleMaps;
export declare const googleMaps: (opts: IOptionsGoogleMaps) => Promise<any[][]>;
export declare const migrate: (opts: IOptionsGoogleMapsEach) => Promise<void[]>;
export declare const each: (opts: IOptionsGoogleMapsEach) => Promise<any[]>;

File diff suppressed because one or more lines are too long

View File

@ -1,6 +1,5 @@
export * from './types.js';
export * from './googlemaps.js';
export * from './types-googlemaps.js';
import { getJson as searchSerpAPI } from "serpapi";
export declare const SearchProviders: {
scaleserp: (params: any) => Promise<import("./types.js").IScaleserpResponse>;

View File

@ -1,6 +1,5 @@
export * from './types.js';
export * from './googlemaps.js';
export * from './types-googlemaps.js';
import { generate_interfaces } from '@polymech/commons';
import { getJson as searchSerpAPI } from "serpapi";
import { search as searchScaleserp } from './scalesep.js';
@ -22,4 +21,4 @@ export const types = () => generate_interfaces([
zodSchemaGoogleMaps(),
zodSchemaEach(),
], 'src/lib/types-googlemaps.ts');
//# sourceMappingURL=data:application/json;base64,eyJ2ZXJzaW9uIjozLCJmaWxlIjoiaW5kZXguanMiLCJzb3VyY2VSb290IjoiIiwic291cmNlcyI6WyIuLi8uLi9zcmMvbGliL2luZGV4LnRzIl0sIm5hbWVzIjpbXSwibWFwcGluZ3MiOiJBQUFBLGNBQWMsWUFBWSxDQUFBO0FBQzFCLGNBQWMsaUJBQWlCLENBQUE7QUFDL0IsY0FBYyx1QkFBdUIsQ0FBQTtBQUNyQyxPQUFPLEVBQUUsbUJBQW1CLEVBQUUsTUFBTSxtQkFBbUIsQ0FBQTtBQUN2RCxPQUFPLEVBQUUsT0FBTyxJQUFJLGFBQWEsRUFBRSxNQUFNLFNBQVMsQ0FBQTtBQUNsRCxPQUFPLEVBQUUsTUFBTSxJQUFJLGVBQWUsRUFBRSxNQUFNLGVBQWUsQ0FBQTtBQUN6RCxPQUFPLEVBQUUsU0FBUyxJQUFJLG1CQUFtQixFQUFFLGFBQWEsRUFBRSxNQUFNLGlCQUFpQixDQUFBO0FBRWpGLE1BQU0sQ0FBQyxNQUFNLGVBQWUsR0FBRztJQUMzQixTQUFTLEVBQUUsZUFBZTtJQUMxQixPQUFPLEVBQUUsYUFBYTtDQUN6QixDQUFBO0FBRUQsTUFBTSxDQUFDLE1BQU0sWUFBWSxHQUFHLENBQUMsSUFBUyxFQUFFLEVBQUU7SUFDdEMsT0FBTztRQUNILEdBQUcsSUFBSTtRQUNQLE1BQU0sRUFBRSxRQUFRO1FBQ2hCLE9BQU8sRUFBRSxRQUFRO1FBQ2pCLE9BQU8sRUFBRSxRQUFRO1FBQ2pCLFdBQVcsRUFBRSxRQUFRO0tBQ3hCLENBQUE7QUFDTCxDQUFDLENBQUE7QUFFRCxNQUFNLENBQUMsTUFBTSxLQUFLLEdBQUcsR0FBRyxFQUFFLENBQUMsbUJBQW1CLENBQUM7SUFDM0MsbUJBQW1CLEVBQVM7SUFDNUIsYUFBYSxFQUFTO0NBQ3pCLEVBQUUsNkJBQTZCLENBQUMsQ0FBQSJ9
//# sourceMappingURL=data:application/json;base64,eyJ2ZXJzaW9uIjozLCJmaWxlIjoiaW5kZXguanMiLCJzb3VyY2VSb290IjoiIiwic291cmNlcyI6WyIuLi8uLi9zcmMvbGliL2luZGV4LnRzIl0sIm5hbWVzIjpbXSwibWFwcGluZ3MiOiJBQUFBLGNBQWMsWUFBWSxDQUFBO0FBQzFCLGNBQWMsaUJBQWlCLENBQUE7QUFDL0IsT0FBTyxFQUFFLG1CQUFtQixFQUFFLE1BQU0sbUJBQW1CLENBQUE7QUFDdkQsT0FBTyxFQUFFLE9BQU8sSUFBSSxhQUFhLEVBQUUsTUFBTSxTQUFTLENBQUE7QUFDbEQsT0FBTyxFQUFFLE1BQU0sSUFBSSxlQUFlLEVBQUUsTUFBTSxlQUFlLENBQUE7QUFDekQsT0FBTyxFQUFFLFNBQVMsSUFBSSxtQkFBbUIsRUFBRSxhQUFhLEVBQUUsTUFBTSxpQkFBaUIsQ0FBQTtBQUVqRixNQUFNLENBQUMsTUFBTSxlQUFlLEdBQUc7SUFDM0IsU0FBUyxFQUFFLGVBQWU7SUFDMUIsT0FBTyxFQUFFLGFBQWE7Q0FDekIsQ0FBQTtBQUVELE1BQU0sQ0FBQyxNQUFNLFlBQVksR0FBRyxDQUFDLElBQVMsRUFBRSxFQUFFO0lBQ3RDLE9BQU87UUFDSCxHQUFHLElBQUk7UUFDUCxNQUFNLEVBQUUsUUFBUTtRQUNoQixPQUFPLEVBQUUsUUFBUTtRQUNqQixPQUFPLEVBQUUsUUFBUTtRQUNqQixXQUFXLEVBQUUsUUFBUTtLQUN4QixDQUFBO0FBQ0wsQ0FBQyxDQUFBO0FBRUQsTUFBTSxDQUFDLE1BQU0sS0FBSyxHQUFHLEdBQUcsRUFBRSxDQUFDLG1CQUFtQixDQUFDO0lBQzNDLG1CQUFtQixFQUFTO0lBQzVCLGFBQWEsRUFBUztDQUN6QixFQUFFLDZCQUE2QixDQUFDLENBQUEifQ==

View File

@ -1,4 +1,3 @@
import { IGeo } from '@polymech/commons/types';
export interface SearchMetadata {
id: string;
status: string;
@ -26,41 +25,10 @@ export interface OperatingHours {
miércoles: string;
jueves: string;
}
export interface LocalResult {
position: number;
title: string;
place_id: string;
data_id: string;
data_cid: string;
reviews_link: string;
photos_link: string;
gps_coordinates: GpsCoordinates;
place_id_search: string;
provider_id: string;
rating: number;
reviews: number;
type: string;
types: string[];
address: string;
open_state: string;
hours: string;
operating_hours: OperatingHours;
phone: string;
website: string;
email: string;
emails: string[];
thumbnail: string;
meta: any;
links?: string[];
allLinks?: string[];
instagram?: string;
facebook?: string;
youtube?: string;
linkedin?: string;
twitter?: string;
geo?: IGeo;
rejected?: boolean;
}
export type LocalResult = {
[key: string]: any;
filterType?: string;
};
export interface SearchParameters {
engine: string;
type: string;

View File

@ -141,4 +141,5 @@ export interface IScaleserpResponse {
export interface IScaleserpSearch {
api_key: string;
q: string;
blacklist?: string[];
}

View File

@ -6,7 +6,7 @@ export const defaultOptionsSchema = z.object({
cache: z.boolean().default(true),
category: z.string().optional(),
debug: z.boolean().default(false),
dst: z.string().default('${POLYMECH_ROOT}/campaign/maps/${FROM}/${CATEGORY}/${QUERY}-10.xls'),
dst: z.string().default('./test/campaign/maps/${FROM}/${CATEGORY}/${QUERY}-10.xls'),
dump: z.string().optional(),
engine: z.string().default('google'),
env_key: z.string().default('OSR-CONFIG'),

View File

@ -44,7 +44,7 @@ export const defaultOptions = (yargs: CLI.Argv) => {
default: 50
}).option('dst', {
description: 'dst output path, supports XLS|CSV|HTML',
default: '${POLYMECH_ROOT}/campaign/maps/${FROM}/${CATEGORY}/${QUERY}-${MM}.xls'
default: './test/campaign/maps/${FROM}/${CATEGORY}/${QUERY}-${MM}.xls'
}).option('filterCountry', {
description: ' by this country',
}).option('filterCity', {

View File

@ -1,71 +1,24 @@
import * as path from 'path'
import { URL } from 'url'
import * as CLI from 'yargs'
import { CONFIG_DEFAULT, DEFAULT_ROOTS, filesEx, IProfile, pathInfo } from '@polymech/commons'
import { CONFIG_DEFAULT, DEFAULT_ROOTS, IProfile, pathInfo, filesEx } from '@polymech/commons'
import { cleanObjectStrings } from './googlemaps-utils.js'
import {parse as parseProfile } from '@polymech/commons/profile'
import { isFile, resolve, substitute } from '@polymech/commons'
import { toYargs } from '@polymech/commons'
export type ParsedURL = {
scheme: string
host?: string
path?: string
query?: Record<string, string>
fragment?: string
}
export const escapeFirstUrlSegment = (url: string): string => {
const schemeEndIndex = url.indexOf('://') + 3;
const restOfUrl = url.slice(schemeEndIndex);
const questionMarkIndex = restOfUrl.indexOf('?');
if (questionMarkIndex !== -1) {
const firstSegment = restOfUrl.slice(0, questionMarkIndex);
const escapedFirstSegment = encodeURIComponent(firstSegment);
return url.slice(0, schemeEndIndex) + escapedFirstSegment + restOfUrl.slice(questionMarkIndex);
} else {
const escapedFirstSegment = encodeURIComponent(restOfUrl);
return url.slice(0, schemeEndIndex) + escapedFirstSegment;
}
}
export const handleFs = async (path: string): Promise<string | object> => {
return read(path)
}
export const schemeHandlers: Record<string, (arg1: string, arg2?: URLSearchParams) => Promise<string | object>> = {
// 'osr-ai': handleOsrAi,
'fs': handleFs,
'default': handleFs
}
export const parseCustomUrl = async (url: string): Promise<string | object> => {
if (!url.includes('://')) {
const _path = path.resolve(resolve(url))
if (exists(_path) && isFile(_path)) {
return read(_path, 'json')
}
}
const parsedUrl = new URL(escapeFirstUrlSegment(url))
let scheme = parsedUrl.protocol.replace(':', '') || 'default'
const handler = schemeHandlers[scheme]
let result: string | object = null
if (handler) {
if (scheme === 'osr-ai') {
result = await handler(parsedUrl.hostname, parsedUrl.searchParams)
} else {
result = await handler(parsedUrl.pathname)
}
}
return result || url
}
import {
zodSchema,
zodSchemaEach,
yargsOptions,
yargsOptionsEach,
IOptionsGoogleMaps,
IOptionsGoogleMapsEach,
} from './googlemaps-zod.js'
import { parseCustomUrl, resolvePath } from './googlemaps-utils.js'
export { zodSchema, zodSchemaEach, yargsOptions, yargsOptionsEach, IOptionsGoogleMaps, IOptionsGoogleMapsEach }
import { clone } from '../options.js'
import { z } from 'zod'
import type { GoogleMapsParameters, GoogleParameters } from "serpapi"
import { sync as write } from '@polymech/fs/write'
import { sync as read } from '@polymech/fs/read'
@ -82,7 +35,6 @@ import { findEMail } from './email.js'
import { defaultEngine, defaultFromLocation, defaultGoogleDomain, defaultLanguage, PAGE_SIZE, SEARCH_AI_PROMPTS } from './constants.js'
import { meta } from './html.js'
import { reverse, REVERSE_DEFAULT } from './geo.js'
import { IOptionsGoogleMaps, IOptionsGoogleMapsEach } from './types-googlemaps.js'
import { writeReport } from '../lib/report_map.js'
import { geocode_forward } from './geo.js'
import { LocalResult } from './map_types.js'
@ -92,74 +44,25 @@ import { store as getStore } from '@polymech/registry'
const MODULE_NAME = 'osr-search'
const queryExtras = ''
const blUrls = ['bazar.preciousplastic.com']
export enum SearchQueriesES { INJECTION = "inyección de plastico" }
export const home = () => "41.6911354,2.1652746"
export enum SearchQueriesES {
INJECTION = 'inyección de plastico',
}
export const locationString = (coords: string, zoom: number = 13) => `@${coords},${zoom}z`
export const store = async (storePath: string, ns: string = 'osr-search') =>
getStore(storePath, ns) as any
export const store = async (storePath: string, ns: string = 'osr-search') => getStore(storePath, ns) as any
export const getStored = async (title:string, storePath: string, ns: string = 'osr-search') =>
export const getStored = async (title: string, storePath: string, ns: string = 'osr-search') =>
getStore(storePath, ns).get(title)
export const zodSchema = () => z.object({
api_key: z.string().optional().describe('API Key'),
cache: z.boolean().default(false),
category: z.string().optional().default('category'),
dst: z.string().default('${POLYMECH_ROOT}/campaign/maps/${FROM}/${CATEGORY}/${QUERY}-10.xls'),
dump: z.string().optional(),
engine: z.string().default('google_maps'),
env_key: z.string().default('OSR-CONFIG'),
filterCity: z.string().optional(),
filterCountry: z.string().optional(),
filterType: z.string().optional(),
findEMail: z.boolean().default(false),
geocode_key: z.string().optional(),
google_domain: z.string().default('google.com'),
headless: z.boolean().default(true).describe('Headless mode'),
language: z.string().default('en'),
limit: z.number().default(5),
logLevel: z.string().default('info'),
meta: z.boolean().default(true),
searchCache: z.boolean().default(false).describe('Use search cache'),
query: z.string().default('plastichub'),
searchCoord: z.string().optional(),
searchFrom: z.string().optional().default('barcelona, spain'),
source: z.string().optional(),
type: z.string().optional().default('search'),
zoom: z.number().optional().default(13),
index: z.string().default('${OSR_ROOT}/osr-directory/meta/index.json').describe('Index file'),
store: z.string().default('${OSR_ROOT}/osr-directory/meta/index.db').describe('Index store'),
}, { description: 'IOptionsGoogleMaps' })
export const zodSchemaEachExtras = () => z.object({
logLevel: z.string().default('info'),
log: z.string().optional(),
country: z.string().describe('The country to search in, variable ${COUNTRY}'),
area: z.string().describe('The city to search in, variable ${AREA}'),
list: z.string().describe('List of items to process, FILE|GLOB|AI-Query, provided as ${TOWN}'),
cwd: z.string().optional().default('./').describe('the current working directory to use, otherwise . is being assumed'),
env: z.string().default(''),
profile: z.string().default('${OSR_ROOT}/osr-templates/osrl/.osrl.json'),
migrate: z.boolean().default(false),
})
export const zodSchemaEach = () => zodSchema().merge(zodSchemaEachExtras()).describe('IOptionsGoogleMapsEach')
export const yargsOptions = (yargs: CLI.Argv) => toYargs(yargs as any, zodSchema() as any)
export const yargsOptionsEach = (yargs: CLI.Argv) => toYargs(yargs as any, zodSchemaEach() as any)
export const searchVendor = async (name: string, dst: string, opts: IScaleserpSearch) => {
let q = name;
let q = name
let ret = await SearchProviders.scaleserp({
api_key: opts.api_key,
q: q + queryExtras
q: q + queryExtras,
})
let urls = ret.organic_results.filter((u) => {
return !blUrls.includes(new URL(u.link).hostname)
return !opts.blacklist.includes(new URL(u.link).hostname)
})
urls = urls.map((u) => u.link) as any
@ -178,13 +81,13 @@ export const defaultParamsGoogleES = (query, mixin) => {
}
export const defaultSearchParamsMapsES = (query, zoom, mixin = {}) => {
return {
"engine": defaultEngine,
"type": "search",
"q": query,
"ll": locationString(home(), zoom),
"google_domain": defaultGoogleDomain,
"hl": defaultLanguage,
...mixin
engine: defaultEngine,
type: 'search',
q: query,
ll: locationString('41.6911354,2.1652746', zoom),
google_domain: defaultGoogleDomain,
hl: defaultLanguage,
...mixin,
}
}
export const searchVendorSA = async (
@ -208,8 +111,18 @@ export const searchVendorSA = async (
export const searchGoogleMap = async (
query: string,
key: string,
opts: any
opts: IOptionsGoogleMaps,
) => {
const roundCoords = (coords: string, decimals: number = 3): string => {
const [latitude, longitude, zoom] = coords.split(',').map((part, index) => {
if (index < 2) {
return parseFloat(parseFloat(part).toFixed(decimals))
}
return part
})
return `@${latitude},${longitude},${zoom}`;
}
const googleParams = {
...opts,
api_key: key,
@ -217,9 +130,9 @@ export const searchGoogleMap = async (
ll: opts.searchCoord
} as GoogleMapsParameters
let results = []
let results: LocalResult[] = []
let pageIdx = 0
let index = opts.index ? read(opts.index, 'json') as any || {} : {}
let index = opts.index ? (read(opts.index, 'json') as any) || {} : {}
const params: any = googleParams
let cached: null
const cache_key = {
@ -234,15 +147,6 @@ export const searchGoogleMap = async (
}
if (opts.searchCache && OSR_CACHE()) {
const roundCoords = (coords: string, decimals: number = 3): string => {
const [latitude, longitude, zoom] = coords.split(',').map((part, index) => {
if (index < 2) {
return parseFloat(parseFloat(part).toFixed(decimals))
}
return part
})
return `@${latitude},${longitude},${zoom}`;
}
cached = await get_cached_object(cache_key, MODULE_NAME)
}
@ -255,7 +159,7 @@ export const searchGoogleMap = async (
page.local_results.forEach((r) => {
r.page = pageIdx
})
results.push(...page.local_results);
results.push(...page.local_results)
if (results.length >= opts.limit) break
pageIdx++
page = await page.next?.()
@ -269,7 +173,7 @@ export const searchGoogleMap = async (
page.place_results.forEach((r) => {
r.page = pageIdx
})
results.push(...page.place_results);
results.push(...page.place_results)
if (results.length >= opts.limit) break
pageIdx++
page = await page.next?.()
@ -278,26 +182,15 @@ export const searchGoogleMap = async (
if (opts.searchCache && OSR_CACHE()) {
set_cached_object(cache_key, MODULE_NAME, results)
}
let idx = 0
//const cachedLoc = async (title: string) => getStored(title, opts.store, MODULE_NAME)
await pMap(results, async (entry: any) => {
idx++
entry.position = entry.page * PAGE_SIZE + idx
try {
if (index[entry.title] && index[entry.title].geo) {
entry.geo = index[entry.title].geo
return
}
return reverse(entry, opts)
} catch (e) {
logger.error(`Error reverse geocoding ${entry.title}`)
entry.geo = REVERSE_DEFAULT
}
}, { concurrency: 3 })
await enrichResults(results, index, opts)
logger.debug(`search ${query} with ${params.ll} / ${params.searchFrom} @ ${opts.zoom} : ${results.length} items`)
logger.debug(
`search ${query} with ${params.ll} / ${params.searchFrom} @ ${opts.zoom} | ${results.length} results before filters`,
)
if (opts.filterCity) {
results = results.filter((r) => r.geo.city.toLowerCase() === opts.filterCity.toLowerCase())
@ -314,49 +207,85 @@ export const searchGoogleMap = async (
results = results.filter((r) => r.gps_coordinates)
const beforeCached = results.length
results = results.filter((r) => {
const newResults = results.filter((r) => {
return index[r.title] == null || !index[r.title].geo || !index[r.title].meta
})
logger.info(`search ${query} with ${params.ll} / ${params.searchFrom} : ${results.length} items | ${beforeCached} before cache`)
results = results.slice(0, opts.limit)
if (opts.meta) {
await pMap(results, (entry: any) => {
if (entry.meta || !entry.website || entry.rejected) {
return
}
logger.info(
`found ${newResults.length} new items for "${query}" from "${params.searchFrom}" | ${beforeCached} total before cache filtering`,
)
const processedResults = newResults.slice(0, opts.limit)
await enrichResults(processedResults, index, opts)
return results
}
const enrichResults = async (results: LocalResult[], index: any, opts: IOptionsGoogleMaps) => {
let idx = 0
await pMap(
results,
async (entry: any) => {
idx++
entry.position = entry.page * PAGE_SIZE + idx
try {
if (index[entry.title] && index[entry.title].meta) {
entry.meta = index[entry.title].meta
if (index[entry.title] && index[entry.title].geo) {
entry.geo = index[entry.title].geo
return
}
return meta(entry, opts)
return reverse(entry, opts)
} catch (e) {
// entry.meta = {}
logger.error(`Error reverse geocoding ${entry.title}`)
entry.geo = REVERSE_DEFAULT
}
}, { concurrency: 1 })
},
{ concurrency: opts.concurrency },
)
if (opts.meta) {
await pMap(
results,
(entry: any) => {
if (entry.meta || !entry.website || entry.rejected) {
return
}
try {
if (index[entry.title] && index[entry.title].meta) {
entry.meta = index[entry.title].meta
return
}
return meta(entry, opts)
} catch (e) {
// entry.meta = {}
}
},
{ concurrency: 1 },
)
}
if (opts.findEMail && opts.meta) {
const emails = await pMap(results, async (entry: any) => {
if (index[entry.title] && index[entry.title].email) {
entry.email = index[entry.title].email
return
}
if (entry.meta && entry.website && !entry.email) {
try {
//logger.debug(`searching email for ${entry.website}`)
return findEMail(SEARCH_AI_PROMPTS.GET_EMAIL, entry.website, opts, entry)
} catch (e) {
logger.error(`Error retrieving EMail data ${entry.title}`)
await pMap(
results,
async (entry: any) => {
if (index[entry.title] && index[entry.title].email) {
entry.email = index[entry.title].email
return
}
}
}, { concurrency: 1 })
if (entry.meta && entry.website && !entry.email) {
try {
logger.debug(`searching email for ${entry.website}`)
return findEMail(SEARCH_AI_PROMPTS.GET_EMAIL, entry.website, opts, entry)
} catch (e) {
logger.error(`Error retrieving EMail data ${entry.title}`)
}
}
},
{ concurrency: 1 },
)
}
return results
}
export const parse = (argv: any): any => {
const args: any = argv
logger.settings.minLevel = args.logLevel as any || 2
export const parse = (argv: IOptionsGoogleMaps): IOptionsGoogleMaps => {
const args: IOptionsGoogleMaps = argv
logger.settings.minLevel = (args.logLevel as any) || 2
const config = CONFIG_DEFAULT(args.env_key) as any
if (!config) {
logger.warn('No config found!')
@ -411,20 +340,7 @@ export const parse = (argv: any): any => {
}
return opts
}
export const resolvePath = (str: string, query, category, opts: any) => {
return path.resolve(resolve(str, false,
{
QUERY: query,
FROM: opts.searchFrom ? opts.searchFrom.split(',').map((s) => s.trim()).join('/') : 'barcelona, spain',
ENGINE: opts.engine,
DOMAIN: opts.google_domain,
LANG: opts.language,
COUNTRY: opts.country,
AREA: opts.area,
CATEGORY: category || 'unknown',
...opts.variables || {}
}))
}
export const googleMaps = async (opts: IOptionsGoogleMaps) => {
opts = parse(opts)
if (!opts) {
@ -438,17 +354,18 @@ export const googleMaps = async (opts: IOptionsGoogleMaps) => {
if (coords) {
opts.searchCoord = locationString(coords, opts.zoom)
} else {
logger.error('Error geocoding', searchFrom)
logger.error(`Error geocoding "${searchFrom}"`)
}
}
} catch (error) {
logger.error('Error geocoding', error, error.stack)
logger.error(`Error geocoding "${opts.searchFrom}"`, error, error.stack)
}
let ret: any[] = []
const search = async (query: string, category, opts: any) => {
opts = clone(opts)
opts.dst = resolvePath(path.join(opts.cwd || '', opts.dst || ''), query, category, opts)
logger.debug(`output destination --dst "${opts.dst}"`)
if (opts.cache !== false && exists(opts.dst + '.json')) {
const cachedPath = opts.dst + '.json'
const cached = read(cachedPath, 'json') as any || []
@ -486,16 +403,42 @@ export const googleMaps = async (opts: IOptionsGoogleMaps) => {
if (opts.dst) {
opts.dst = resolvePath(opts.dst, 'all', 'all', opts)
logger.debug(`final output destination --dst "${opts.dst}"`)
let existingResults: LocalResult[] = []
if (exists(opts.dst + '.json')) {
const last = (read(opts.dst + '.json', 'json') as any || [])
ret = [...last, ...ret]
existingResults = (read(opts.dst + '.json', 'json') as LocalResult[]) || []
}
write(opts.dst + '.json', ret)
writeReport(ret, opts.dst, opts)
// Combine, deduplicate, clean, and process URLs in a single chain
const finalResults = Array.from(
[...existingResults, ...ret].reduce((map, obj) => {
if (obj.place_id) {
map.set(obj.place_id, obj)
}
return map
}, new Map<string, LocalResult>()).values(),
)
.map(cleanObjectStrings)
.map((r: any) => {
if (r.website && typeof r.website === 'string' && r.website.startsWith('/url?q=')) {
try {
const urlString = r.website.substring('/url?q='.length)
const decodedUrl = decodeURIComponent(urlString)
const urlParts = decodedUrl.split('&')
r.website = urlParts[0]
} catch (e) {
logger.warn(`Could not parse website URL: ${r.website}`)
}
}
return r
})
write(opts.dst + '.json', finalResults)
writeReport(finalResults, opts.dst, opts)
}
if (opts.index) {
let index = read(opts.index, 'json') as any || {}
let index = (read(opts.index, 'json') as any) || {}
ret.forEach((r) => {
if (!index[r.title]) {
index[r.title] = r
@ -528,8 +471,12 @@ export const migrate = async (opts: IOptionsGoogleMapsEach) => {
return ret
}
export const each = async (opts: IOptionsGoogleMapsEach) => {
logger.settings.minLevel = opts.logLevel as any || 2
logger.settings.minLevel = (opts.logLevel as any) || 2
let items: string[] = []
if (!opts.list) {
logger.error('No list provided for each command')
return
}
let listPath = path.resolve(resolve(opts.list))
const profile: IProfile = parseProfile(opts.profile,
@ -565,22 +512,26 @@ export const each = async (opts: IOptionsGoogleMapsEach) => {
items = items.filter((item) => !!item)
logger.debug(`${items.length} items`)
write(path.join(path.resolve(resolve(opts.cwd), 'list.json')), items)
const all: any[] = await pMap(items, (KEY) => {
const variables = {
KEY,
TOWN: KEY,
...profile.variables
}
const googleOpts = {
...opts,
query: substitute(false, opts.query, variables),
dst: substitute(false, opts.dst, variables),
searchFrom: substitute(false, opts.searchFrom, variables),
variables
}
const ret = googleMaps(googleOpts)
return ret
}, { concurrency: 1 })
const all: any[] = await pMap(
items,
(KEY) => {
const variables = {
KEY,
TOWN: KEY,
...profile.variables
}
const googleOpts: IOptionsGoogleMaps = {
...opts,
query: substitute(false, opts.query, variables),
dst: substitute(false, opts.dst, variables),
searchFrom: substitute(false, opts.searchFrom, variables),
variables
}
const ret = googleMaps(googleOpts)
return ret
},
{ concurrency: 1 },
)
opts.log && write(path.resolve(resolve(opts.log)), all)
return all
}

View File

@ -1,6 +1,5 @@
export * from './types.js'
export * from './googlemaps.js'
export * from './types-googlemaps.js'
import { generate_interfaces } from '@polymech/commons'
import { getJson as searchSerpAPI } from "serpapi"
import { search as searchScaleserp } from './scalesep.js'

View File

@ -31,40 +31,9 @@ export interface OperatingHours {
jueves: string;
}
export interface LocalResult {
position: number;
title: string;
place_id: string;
data_id: string;
data_cid: string;
reviews_link: string;
photos_link: string;
gps_coordinates: GpsCoordinates;
place_id_search: string;
provider_id: string;
rating: number;
reviews: number;
type: string;
types: string[];
address: string;
open_state: string;
hours: string;
operating_hours: OperatingHours;
phone: string;
website: string;
email: string;
emails: string[];
thumbnail: string;
meta:any;
links?: string[];
allLinks?: string[];
instagram?: string;
facebook?: string;
youtube?: string;
linkedin?: string;
twitter?: string;
geo?: IGeo;
rejected?: boolean;
export type LocalResult = {
[key: string]: any
filterType?: string
}
export interface SearchParameters {

View File

@ -1,78 +0,0 @@
export interface IOptionsGoogleMaps {
/** API Key */
api_key?: string | undefined;
cache?: boolean;
category?: string;
dst?: string;
dump?: string | undefined;
engine?: string;
env_key?: string;
filterCity?: string | undefined;
filterCountry?: string | undefined;
filterType?: string | undefined;
findEMail?: boolean;
geocode_key?: string | undefined;
google_domain?: string;
/** Headless mode */
headless?: boolean;
language?: string;
limit?: number;
logLevel?: string;
meta?: boolean;
searchCache?: boolean;
query?: string;
searchCoord?: string | undefined;
searchFrom?: string | undefined;
source?: string | undefined;
type?: string;
zoom?: number;
/** Index file */
index?: string;
/** Index store */
store?: string;
}
export interface IOptionsGoogleMapsEach {
/** API Key */
api_key?: string | undefined;
cache?: boolean;
category?: string;
dst?: string;
dump?: string | undefined;
engine?: string;
env_key?: string;
filterCity?: string | undefined;
filterCountry?: string | undefined;
filterType?: string | undefined;
findEMail?: boolean;
geocode_key?: string | undefined;
google_domain?: string;
/** Headless mode */
headless?: boolean;
language?: string;
limit?: number;
logLevel?: string;
meta?: boolean;
searchCache?: boolean;
query?: string;
searchCoord?: string | undefined;
searchFrom?: string | undefined;
source?: string | undefined;
type?: string;
zoom?: number;
/** Index file */
index?: string;
/** Index store */
store?: string;
log?: string | undefined;
/** The country to search in, variable ${COUNTRY} */
country: string;
/** The city to search in, variable ${AREA} */
area: string;
/** List of items to process, FILE|GLOB|AI-Query, provided as ${TOWN} */
list: string;
/** the current working directory to use, otherwise . is being assumed */
cwd?: string;
env?: string;
profile?: string;
migrate?: boolean;
}

View File

@ -163,6 +163,7 @@ export interface IScaleserpResponse {
}
export interface IScaleserpSearch {
api_key:string
q:string
api_key: string
q: string
blacklist?: string[]
}