This commit is contained in:
lovebird 2026-03-02 08:56:39 +01:00
parent ada3b29a40
commit 49c1a607d3
21 changed files with 2274 additions and 1509 deletions

269
README.md
View File

@ -1,170 +1,119 @@
# Deepmark
# @polymech/deepl-mark
Translate markdown files correctly with `mdast` and DeepL.
Translate markdown and MDX content using [DeepL](https://www.deepl.com/), powered by `mdast`.
## Getting Started
Correctly handles headings, paragraphs, lists, tables (GFM), links, JSX components, frontmatter, and inline formatting — preserving structure while translating only the text.
1. Install `deepmark`:
## Install
```bash
# NPM
npm install -D deepmark
# PNPM
pnpm add -D deepmark
# Yarn
yarn add -D deepmark
npm install @polymech/deepl-mark
```
2. Create a `deepmark.config.mjs` on your project root:
```js
// deepmark.config.mjs
/** @type {import("deepmark").UserConfig} */
export default {
sourceLanguage: 'en',
outputLanguages: ['zh', 'ja'],
directories: [
['i18n/$langcode$', 'i18n/$langcode$'],
['docs', 'i18n/$langcode$/docs'],
['blog', 'i18n/$langcode$/blog']
]
};
```
3. Set `DEEPL_AUTH_KEY` environment variable containing DeepL developer auth key:
```bash
# If you're on Linux
export DEEPL_AUTH_KEY=your_auth_key
```
You can also use something like `dotenv` package. For CI and remote environment such as Github Actions and Gitpod, look into their setting page to set environment variables.
4. Set NPM scripts in your `package.json`:
```json
{
"scripts": {
"translate": "deepmark translate"
}
}
```
```bash
# NPM
npm run translate
# PNPM
pnpm run translate
# Yarn
yarn translate
```
Notes:
> `deepmark` also supports translating JSON and YAML files.
## Limitations
1. It is not possible to add `mdast` plugins to the workflow.
2. Only support `.md` and `.mdx`. Other extended verisons of markdown such as Svelte extended `mdsvex` are not supported.
## Documentation
#### Translation modes:
```bash
deepmark translate --mode hybrid|offline|online
```
- `hybrid` (default): Look for translation from local translation memory first before using DeepL API.
- `offline`: Translate using the local translation memory only, passthrough if not found. This mode will not update the translation memory.
- `online`: Translate using DeepL API only. Will overwrite existing translation memory.
#### Configuration:
## Usage
```ts
interface UserConfig {
/**
* Source's language code. Based on DeepL supported languages.
*/
sourceLanguage: SourceLanguageCode;
/**
* Output's languages code. Based on DeepL supported languages.
*/
outputLanguages: TargetLanguageCode[];
/**
* Sources and ouputs directories pairs. $langcode$ variable
* is provided to dynamically define directory.
*
* e.g. [ ["docs", "i18n/$langcode$/docs"], ["blog", "i18n/$langcode$/blog"] ]
*/
directories: [string, string][];
/**
* Override current working directory, defaults to `process.cwd()`.
*/
cwd?: string;
/**
* By default, all .md, .mdx, .json, and .yaml|.yml files inside
* source directories will be included.
*
* Define glob patterns to filter what files to include or exclude.
* But, the end result is still restricted by file types (.md, .mdx, .json).
*/
files?: {
include?: string[];
exclude?: string[];
};
/**
* Frontmatter fields.
*/
frontmatterFields?: {
include?: string[];
exclude?: string[];
};
/**
* Markdown node types to include or exclude based on MDAST. Defaults to exclude `code` and `link`.
*/
markdownNodes?: {
default?: boolean;
include?: MdNodeType[];
exclude?: MdNodeType[];
};
/**
* HTML elements to include and exlcude, down to the level of attributes
* and children. Include all HTML elements text content
* and some global attributes such as title and placeholder.
*/
htmlElements?: {
default?: boolean;
include?: Partial<{ [Tag in HtmlTag]: { children: boolean; attributes: string[] } }>;
exclude?: HtmlTag[];
};
/**
* JSX components to include and exclude, down to the level of attributes
* and children. Include all JSX components text children
* and exclude all attributes by default.
*
* Support array, object, and jsx attribute value. For object and array value,
* you can specify the access path starting with the attribute name
* e.g. `items.description` to translate `items={[{description: "..."}]}.
*/
jsxComponents?: {
default?: boolean;
include?: { [Name: string]: { children: boolean; attributes: string[] } };
exclude?: string[];
};
/**
* JSON or YAML file properties to include and exclude.
* Exclude all properties by default.
*/
jsonOrYamlProperties?: {
include?: string[];
exclude?: string[];
};
}
import { translate } from '@polymech/deepl-mark';
const markdown = '# Hello World\n\nThis is a paragraph.';
const result = await translate(markdown, 'en', 'de');
console.log(result);
// # Hallo Welt
//
// Dies ist ein Absatz.
```
### Authentication
Provide your DeepL API key via **options** or **environment variable**:
```ts
// Option 1: pass directly
await translate(md, 'en', 'de', { apiKey: 'your-deepl-key' });
// Option 2: environment variable
// Set DEEPL_AUTH_KEY=your-deepl-key
await translate(md, 'en', 'de');
```
### Options
The optional 4th argument accepts a `TranslateOptions` object:
```ts
await translate(content, 'en', 'de', {
// DeepL API key (falls back to DEEPL_AUTH_KEY env var)
apiKey: '...',
// DeepL translation options (tagHandling, splitSentences, formality, glossaryId, etc.)
deeplOptions: {
formality: 'more',
glossaryId: '...',
},
// Frontmatter fields to include/exclude
frontmatterFields: {
include: ['title', 'description'],
exclude: ['slug'],
},
// Markdown node types to include/exclude (defaults: exclude 'code')
markdownNodes: {
exclude: ['code'],
},
// HTML elements to include/exclude
htmlElements: {
exclude: ['pre', 'code'],
},
// JSX components to include/exclude (with attribute-level control)
jsxComponents: {
include: {
Card: { children: true, attributes: ['header'] },
},
},
});
```
#### DeepL defaults
The following DeepL options are applied by default and can be overridden via `deeplOptions`:
| Option | Default |
|------------------|----------------|
| `tagHandling` | `'html'` |
| `splitSentences` | `'nonewlines'` |
### Supported content
- **Markdown** (`.md`) — headings, paragraphs, lists, blockquotes, tables (GFM), links, images
- **MDX** (`.mdx`) — JSX components and expressions
- **Frontmatter** — YAML frontmatter fields
- **HTML** — inline HTML elements and attributes
## API
### `translate(content, sourceLang, targetLang, options?)`
| Parameter | Type | Description |
|--------------|------------------------|-----------------------------------------------|
| `content` | `string` | Markdown or MDX string to translate |
| `sourceLang` | `SourceLanguageCode` | Source language (e.g. `'en'`, `'de'`, `'fr'`) |
| `targetLang` | `TargetLanguageCode` | Target language (e.g. `'de'`, `'en-US'`) |
| `options` | `TranslateOptions` | Optional config (see above) |
Returns `Promise<string>` — the translated markdown.
## Scripts
```bash
npm test # run all tests
npm run test:tables # run table translation e2e test
npm run build # build for distribution
```
## License
MIT

12
dist/ast/estree.js vendored
View File

@ -2,18 +2,14 @@ function esNodeIs(node, type) {
return node ? node.type === type : false;
}
function resolveEstreePropertyPath(node, parents, attributeName) {
if (!esNodeIs(parents[2], "ArrayExpression") && !esNodeIs(parents[2], "ObjectExpression"))
return;
if (!esNodeIs(node.key, "Identifier"))
return;
if (!esNodeIs(parents[2], "ArrayExpression") && !esNodeIs(parents[2], "ObjectExpression")) return;
if (!esNodeIs(node.key, "Identifier")) return;
const names = [node.key.name];
for (let i = parents.length - 1; i > 1; i--) {
const parent = parents[i];
if (esNodeIs(parent, "ArrayExpression") || esNodeIs(parent, "ObjectExpression"))
continue;
if (esNodeIs(parent, "ArrayExpression") || esNodeIs(parent, "ObjectExpression")) continue;
if (esNodeIs(parent, "Property")) {
if (!esNodeIs(parent.key, "Identifier"))
return;
if (!esNodeIs(parent.key, "Identifier")) return;
names.push(parent.key.name);
continue;
}

6
dist/ast/eswalk.js vendored
View File

@ -53,8 +53,7 @@ const DEFAULT_ESWALKERS = {
};
function eswalk(ast, visitors, walkers = DEFAULT_ESWALKERS) {
const process = (node, parents) => {
if (!node)
return;
if (!node) return;
let type = node.type;
if (esNodeIs(node, "Literal")) {
type = typeof node.value === "bigint" ? "BigIntLiteral" : isRegExp(node.value) ? "RegExpLiteral" : "SimpleLiteral";
@ -66,8 +65,7 @@ function eswalk(ast, visitors, walkers = DEFAULT_ESWALKERS) {
const signal = visit(node, parents);
keepWalking = signal === false ? false : true;
}
if (keepWalking && walk)
walk(node, parents, process);
if (keepWalking && walk) walk(node, parents, process);
};
process(ast, []);
}

8
dist/ast/mdast.js vendored
View File

@ -1,9 +1,11 @@
import { fromMarkdown } from "mdast-util-from-markdown";
import { frontmatterFromMarkdown, frontmatterToMarkdown } from "mdast-util-frontmatter";
import { gfmTableFromMarkdown, gfmTableToMarkdown } from "mdast-util-gfm-table";
import { htmlCommentFromMarkdown, htmlCommentToMarkdown } from "../vendor/mdast-util-html-comment.js";
import { mdxFromMarkdown, mdxToMarkdown } from "mdast-util-mdx";
import { toMarkdown } from "mdast-util-to-markdown";
import { frontmatter } from "micromark-extension-frontmatter";
import { gfmTable } from "micromark-extension-gfm-table";
import { htmlComment } from "../vendor/micromark-extension-html-comment.js";
import { mdxjs } from "micromark-extension-mdxjs";
function mdNodeIs(node, type) {
@ -14,13 +16,13 @@ function mdNodeIsJsxElement(node) {
}
function getMdast(markdown) {
return fromMarkdown(markdown, {
extensions: [frontmatter("yaml"), mdxjs(), htmlComment()],
mdastExtensions: [frontmatterFromMarkdown("yaml"), mdxFromMarkdown(), htmlCommentFromMarkdown()]
extensions: [frontmatter("yaml"), mdxjs(), gfmTable, htmlComment()],
mdastExtensions: [frontmatterFromMarkdown("yaml"), mdxFromMarkdown(), gfmTableFromMarkdown, htmlCommentFromMarkdown()]
});
}
function getMarkdown(mdast) {
return toMarkdown(mdast, {
extensions: [frontmatterToMarkdown("yaml"), mdxToMarkdown(), htmlCommentToMarkdown()],
extensions: [frontmatterToMarkdown("yaml"), mdxToMarkdown(), gfmTableToMarkdown(), htmlCommentToMarkdown()],
join: [
(__, _, parent) => {
if (mdNodeIsJsxElement(parent)) {

9
dist/ast/unwalk.js vendored
View File

@ -4,19 +4,16 @@ const STOP = false;
function unwalk(node, visit, filter) {
let next = true;
function step(node2, parent, index) {
if (filter && !filter(node2, parent))
return;
if (filter && !filter(node2, parent)) return;
if (unNodeIsParent(node2)) {
for (let i = 0; i < node2.children.length; i++) {
if (!next)
break;
if (!next) break;
const child = node2.children[i];
step(child, node2, i);
}
node2.children = node2.children.filter((child) => child);
}
if (!next)
return;
if (!next) return;
const signal = visit(node2, parent, index);
next = signal === void 0 || NEXT ? NEXT : STOP;
}

48
dist/extract.js vendored
View File

@ -34,8 +34,7 @@ function extractMdastStrings({
if (mdNodeIsJsxElement(node) && node.name) {
if (isHtmlTag(node.name)) {
for (const attribute of node.attributes) {
if (!mdNodeIs(attribute, "mdxJsxAttribute"))
continue;
if (!mdNodeIs(attribute, "mdxJsxAttribute")) continue;
if (!isHtmlElementAttributeIncluded({ tag: node.name, attribute: attribute.name, config }))
continue;
if (isString(attribute.value)) {
@ -52,8 +51,7 @@ function extractMdastStrings({
}
} else {
for (const attribute of node.attributes) {
if (!mdNodeIs(attribute, "mdxJsxAttribute"))
continue;
if (!mdNodeIs(attribute, "mdxJsxAttribute")) continue;
const componentName = node.name;
const isAttributeIncluded = isJsxComponentAttributeIncluded({
name: componentName,
@ -61,8 +59,7 @@ function extractMdastStrings({
config
});
if (isString(attribute.value)) {
if (!isAttributeIncluded)
continue;
if (!isAttributeIncluded) continue;
strings.push(attribute.value.trim());
} else if (attribute.value?.data?.estree) {
if (!config.jsxComponents.include[componentName] || !config.jsxComponents.include[componentName].attributes.some(
@ -74,8 +71,7 @@ function extractMdastStrings({
SimpleLiteral(esnode, _2) {
if (isString(esnode.value))
pushTidyString({ array: strings, string: esnode.value });
if (esnode.value === "aye")
console.log("passed");
if (esnode.value === "aye") console.log("passed");
},
JSXElement(esnode, _2) {
const name = esnode.openingElement.name.name;
@ -101,8 +97,7 @@ function extractMdastStrings({
pushTidyString({ array: strings, string: esnode.value });
},
Property(esnode, parents) {
if (!esNodeIs(esnode, "Identifier"))
return false;
if (!esNodeIs(esnode, "Identifier")) return false;
const propertyPath = resolveEstreePropertyPath(esnode, parents, attribute.name);
if (!propertyPath || !isJsxComponentAttributeIncluded({
name: componentName,
@ -117,14 +112,11 @@ function extractMdastStrings({
}
}
if (mdNodeIs(node, "yaml")) {
if (isEmptyArray(config.frontmatterFields.include))
return;
if (isEmptyString(node.value))
return;
if (isEmptyArray(config.frontmatterFields.include)) return;
if (isEmptyString(node.value)) return;
const object = parseYaml(node.value);
for (const field in object) {
if (!isFrontmatterFieldIncluded({ field, config }))
continue;
if (!isFrontmatterFieldIncluded({ field, config })) continue;
const value = object[field];
if (isString(value)) {
strings.push(value);
@ -132,8 +124,7 @@ function extractMdastStrings({
}
if (isArray(value)) {
for (const item of value) {
if (!isString(item))
continue;
if (!isString(item)) continue;
strings.push(item);
}
}
@ -142,25 +133,20 @@ function extractMdastStrings({
}
},
(node, parent) => {
if (!isMarkdownNodeIncluded({ type: node.type, config }))
return false;
if (!isMarkdownNodeIncluded({ type: node.type, config })) return false;
if (parent && mdNodeIsJsxElement(parent) && parent.name) {
if (isHtmlTag(parent.name)) {
if (!isHtmlElementChildrenIncluded({ tag: parent.name, config }))
return false;
if (!isHtmlElementChildrenIncluded({ tag: parent.name, config })) return false;
} else {
if (!isJsxComponentChildrenIncluded({ name: parent.name, config }))
return false;
if (!isJsxComponentChildrenIncluded({ name: parent.name, config })) return false;
}
return true;
}
if (mdNodeIsJsxElement(node) && node.name) {
if (isHtmlTag(node.name)) {
if (!isHtmlElementIncluded({ tag: node.name, config }))
return false;
if (!isHtmlElementIncluded({ tag: node.name, config })) return false;
} else {
if (!isJsxComponentIncluded({ name: node.name, config }))
return false;
if (!isJsxComponentIncluded({ name: node.name, config })) return false;
}
return true;
}
@ -175,14 +161,12 @@ function extractJsonOrYamlStrings({
config
}) {
const strings = [];
if (isEmptyArray(config.jsonOrYamlProperties.include))
return strings;
if (isEmptyArray(config.jsonOrYamlProperties.include)) return strings;
const parsed = type === "json" ? JSON.parse(source) : parseYaml(source);
process(parsed);
function process(value, property) {
if (typeof value === "string") {
if (property && isJsonOrYamlPropertyIncluded({ property, config }))
strings.push(value);
if (property && isJsonOrYamlPropertyIncluded({ property, config })) strings.push(value);
return;
}
if (isArray(value)) {

2
dist/format.js vendored
View File

@ -3,7 +3,7 @@ import { getMarkdown, getMdast, mdNodeIs } from "./ast/mdast.js";
import { unwalk } from "./ast/unwalk.js";
async function format(markdown) {
const mdast = getMdast(
prettier.format(markdown, {
await prettier.format(markdown, {
parser: "mdx",
printWidth: Infinity,
proseWrap: "never",

17
dist/index.d.ts vendored
View File

@ -1,4 +1,14 @@
import type { SourceLanguageCode, TargetLanguageCode } from 'deepl-node';
import type { SourceLanguageCode, TargetLanguageCode, TranslateTextOptions } from 'deepl-node';
import type { UserConfig } from './config.js';
/**
* Options to control which parts of the markdown are translated.
*/
export type TranslateOptions = Omit<UserConfig, 'sourceLanguage' | 'outputLanguages' | 'directories'> & {
/** DeepL API key. Falls back to `DEEPL_AUTH_KEY` env var if not provided. */
apiKey?: string;
/** DeepL translation options (tagHandling, splitSentences, formality, glossaryId, etc.) */
deeplOptions?: TranslateTextOptions;
};
/**
* Translate markdown/MDX content from one language to another using DeepL.
*
@ -7,6 +17,7 @@ import type { SourceLanguageCode, TargetLanguageCode } from 'deepl-node';
* @param content - Markdown or MDX string to translate
* @param sourceLang - Source language code (e.g. 'en', 'de', 'fr')
* @param targetLang - Target language code (e.g. 'de', 'en-US', 'fr')
* @param options - Optional config to control extraction (frontmatter, jsx, html, etc.)
* @returns Translated markdown string
*
* @example
@ -17,5 +28,5 @@ import type { SourceLanguageCode, TargetLanguageCode } from 'deepl-node';
* console.log(result); // '# Hallo Welt'
* ```
*/
export declare function translate(content: string, sourceLang: SourceLanguageCode, targetLang: TargetLanguageCode): Promise<string>;
export type { SourceLanguageCode, TargetLanguageCode } from 'deepl-node';
export declare function translate(content: string, sourceLang: SourceLanguageCode, targetLang: TargetLanguageCode, options?: TranslateOptions): Promise<string>;
export type { SourceLanguageCode, TargetLanguageCode, TranslateTextOptions } from 'deepl-node';

11
dist/index.js vendored
View File

@ -4,18 +4,19 @@ import { extractMdastStrings } from "./extract.js";
import { format } from "./format.js";
import { replaceMdastStrings } from "./replace.js";
import { translateStrings } from "./translate.js";
async function translate(content, sourceLang, targetLang) {
async function translate(content, sourceLang, targetLang, options) {
const { apiKey, deeplOptions, ...configOptions } = options ?? {};
const config = resolveConfig({
sourceLanguage: sourceLang,
outputLanguages: [targetLang],
directories: [["", ""]]
directories: [["", ""]],
...configOptions
});
const formatted = await format(content);
const mdast = getMdast(formatted);
const strings = extractMdastStrings({ mdast, config });
if (strings.length === 0)
return content;
const translated = await translateStrings(strings, sourceLang, targetLang);
if (strings.length === 0) return content;
const translated = await translateStrings(strings, sourceLang, targetLang, apiKey, deeplOptions);
const result = replaceMdastStrings({ mdast, strings: translated, config });
return getMarkdown(result);
}

51
dist/replace.js vendored
View File

@ -35,8 +35,7 @@ function replaceMdastStrings({
if (mdNodeIsJsxElement(node) && node.name) {
if (isHtmlTag(node.name)) {
for (const attribute of node.attributes) {
if (!mdNodeIs(attribute, "mdxJsxAttribute"))
continue;
if (!mdNodeIs(attribute, "mdxJsxAttribute")) continue;
if (!isHtmlElementAttributeIncluded({ tag: node.name, attribute: attribute.name, config }))
continue;
if (isString(attribute.value)) {
@ -45,16 +44,14 @@ function replaceMdastStrings({
const estree = attribute.value.data.estree;
eswalk(estree, {
SimpleLiteral(esnode, _2) {
if (isString(esnode.value))
esnode.value = strings.pop();
if (isString(esnode.value)) esnode.value = strings.pop();
}
});
}
}
} else {
for (const attribute of node.attributes) {
if (!mdNodeIs(attribute, "mdxJsxAttribute"))
continue;
if (!mdNodeIs(attribute, "mdxJsxAttribute")) continue;
const componentName = node.name;
const isAttributeIncluded = isJsxComponentAttributeIncluded({
name: componentName,
@ -62,8 +59,7 @@ function replaceMdastStrings({
config
});
if (isString(attribute.value)) {
if (!isAttributeIncluded)
continue;
if (!isAttributeIncluded) continue;
attribute.value = strings.pop();
} else if (attribute.value?.data?.estree) {
if (!config.jsxComponents.include[componentName] || !config.jsxComponents.include[componentName].attributes.some(
@ -73,8 +69,7 @@ function replaceMdastStrings({
const estree = attribute.value.data.estree;
eswalk(estree, {
SimpleLiteral(esnode, _2) {
if (isString(esnode.value))
esnode.value = strings.pop();
if (isString(esnode.value)) esnode.value = strings.pop();
},
JSXElement(esnode, _2) {
const name = esnode.openingElement.name.name;
@ -100,8 +95,7 @@ function replaceMdastStrings({
esnode.value = strings.pop();
},
Property(esnode, parents) {
if (!esNodeIs(esnode, "Identifier"))
return false;
if (!esNodeIs(esnode, "Identifier")) return false;
const propertyPath = resolveEstreePropertyPath(esnode, parents, attribute.name);
if (!propertyPath || !isJsxComponentAttributeIncluded({
name: componentName,
@ -116,14 +110,11 @@ function replaceMdastStrings({
}
}
if (mdNodeIs(node, "yaml")) {
if (isEmptyArray(config.frontmatterFields.include))
return;
if (isEmptyString(node.value))
return;
if (isEmptyArray(config.frontmatterFields.include)) return;
if (isEmptyString(node.value)) return;
const object = parseYaml(node.value);
for (const field in object) {
if (!isFrontmatterFieldIncluded({ field, config }))
continue;
if (!isFrontmatterFieldIncluded({ field, config })) continue;
const value = object[field];
if (isString(value)) {
object[field] = strings.pop();
@ -131,8 +122,7 @@ function replaceMdastStrings({
}
if (isArray(value)) {
for (const [index, item] of value.entries()) {
if (!isString(item))
continue;
if (!isString(item)) continue;
value[index] = strings.pop();
}
}
@ -141,25 +131,20 @@ function replaceMdastStrings({
}
},
(node, parent) => {
if (!isMarkdownNodeIncluded({ type: node.type, config }))
return false;
if (!isMarkdownNodeIncluded({ type: node.type, config })) return false;
if (parent && mdNodeIsJsxElement(parent) && parent.name) {
if (isHtmlTag(parent.name)) {
if (!isHtmlElementChildrenIncluded({ tag: parent.name, config }))
return false;
if (!isHtmlElementChildrenIncluded({ tag: parent.name, config })) return false;
} else {
if (!isJsxComponentChildrenIncluded({ name: parent.name, config }))
return false;
if (!isJsxComponentChildrenIncluded({ name: parent.name, config })) return false;
}
return true;
}
if (mdNodeIsJsxElement(node) && node.name) {
if (isHtmlTag(node.name)) {
if (!isHtmlElementIncluded({ tag: node.name, config }))
return false;
if (!isHtmlElementIncluded({ tag: node.name, config })) return false;
} else {
if (!isJsxComponentIncluded({ name: node.name, config }))
return false;
if (!isJsxComponentIncluded({ name: node.name, config })) return false;
}
return true;
}
@ -174,8 +159,7 @@ function replaceJsonOrYamlStrings({
strings,
config
}) {
if (isEmptyArray(config.jsonOrYamlProperties.include))
return source;
if (isEmptyArray(config.jsonOrYamlProperties.include)) return source;
strings = strings.reverse();
const parsed = type === "json" ? JSON.parse(source) : parseYaml(source);
process({ value: parsed });
@ -212,8 +196,7 @@ function replaceJsonOrYamlStrings({
return;
}
}
if (type === "json")
return JSON.stringify(parsed);
if (type === "json") return JSON.stringify(parsed);
return stringifyYaml(parsed);
}
export {

7
dist/translate.d.ts vendored
View File

@ -1,7 +1,6 @@
import type { SourceLanguageCode, TargetLanguageCode } from 'deepl-node';
import type { SourceLanguageCode, TargetLanguageCode, TranslateTextOptions } from 'deepl-node';
/**
* Translate an array of strings from sourceLang to targetLang using DeepL.
* Batches requests in groups of 10.
* Requires DEEPL_AUTH_KEY environment variable.
* Batches requests and retries on rate-limit (429) or server (5xx) errors.
*/
export declare function translateStrings(strings: string[], sourceLang: SourceLanguageCode, targetLang: TargetLanguageCode): Promise<string[]>;
export declare function translateStrings(strings: string[], sourceLang: SourceLanguageCode, targetLang: TargetLanguageCode, apiKey?: string, deeplOptions?: TranslateTextOptions, batchSize?: number): Promise<string[]>;

58
dist/translate.js vendored
View File

@ -1,35 +1,41 @@
import { Translator } from "deepl-node";
async function translateStrings(strings, sourceLang, targetLang) {
if (strings.length === 0)
return [];
const DEEPL_AUTH_KEY = process.env.DEEPL_AUTH_KEY;
if (!DEEPL_AUTH_KEY)
throw new Error("DEEPL_AUTH_KEY environment variable must be set");
const deepl = new Translator(DEEPL_AUTH_KEY);
const DEFAULT_BATCH_SIZE = 50;
const MAX_RETRIES = 3;
async function translateStrings(strings, sourceLang, targetLang, apiKey, deeplOptions, batchSize = DEFAULT_BATCH_SIZE) {
if (strings.length === 0) return [];
const key = apiKey ?? process.env.DEEPL_AUTH_KEY;
if (!key) throw new Error("DeepL API key must be provided via options.apiKey or DEEPL_AUTH_KEY environment variable");
const deepl = new Translator(key);
const translations = new Array(strings.length).fill("");
const queue = [];
for (const [index, string] of strings.entries()) {
queue.push([index, string]);
if (index === strings.length - 1 || queue.length === 10) {
const indexes = queue.map(([i]) => i);
const batch = queue.map(([, s]) => s);
const results = await deepl.translateText(
batch,
sourceLang,
targetLang,
{
tagHandling: "html",
splitSentences: "nonewlines"
}
);
for (let j = 0; j < indexes.length; j++) {
translations[indexes[j]] = results[j].text;
}
queue.length = 0;
const textOptions = {
tagHandling: "html",
splitSentences: "nonewlines",
...deeplOptions
};
for (let i = 0; i < strings.length; i += batchSize) {
const batch = strings.slice(i, i + batchSize);
const results = await retry(
() => deepl.translateText(batch, sourceLang, targetLang, textOptions)
);
for (let j = 0; j < batch.length; j++) {
translations[i + j] = results[j].text;
}
}
return translations;
}
async function retry(fn, retries = MAX_RETRIES) {
for (let attempt = 0; ; attempt++) {
try {
return await fn();
} catch (err) {
const status = err?.statusCode ?? err?.status;
const retryable = status === 429 || status === 456 || status >= 500 && status < 600;
if (!retryable || attempt >= retries) throw err;
const delay = Math.min(1e3 * 2 ** attempt, 1e4);
await new Promise((r) => setTimeout(r, delay));
}
}
}
export {
translateStrings
};

View File

@ -58,8 +58,7 @@ const tokenize = (effects, ok, nok) => {
return nok(code);
}
function inside(code) {
if (code === codes.eof)
return nok(code);
if (code === codes.eof) return nok(code);
if (markdownLineEnding(code)) {
effects.exit(types.data);
return atLineEnding(code);
@ -86,8 +85,7 @@ const tokenize = (effects, ok, nok) => {
return factorySpace(effects, afterLinePrefix, types.linePrefix);
}
function afterLinePrefix(code) {
if (markdownLineEnding(code))
return atLineEnding(code);
if (markdownLineEnding(code)) return atLineEnding(code);
effects.enter(types.data);
return inside(code);
}

3000
package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@ -1,9 +1,9 @@
{
"name": "deepmark",
"name": "@polymech/deepl-mark",
"description": "Translate markdown files correctly with `mdast` and DeepL.",
"version": "0.2.0",
"version": "0.3.0",
"license": "MIT",
"author": "Izzuddin Natsir",
"author": "Izzuddin Natsir | Polymech",
"type": "module",
"files": [
"dist/*"
@ -26,7 +26,7 @@
"acorn": "^8.8.2",
"acorn-jsx": "^5.3.2",
"astring": "^1.8.4",
"deepl-node": "^1.8.0",
"deepl-node": "^1.24.0",
"mdast-util-from-markdown": "^1.3.0",
"mdast-util-frontmatter": "^1.0.1",
"mdast-util-gfm-table": "^1.0.7",
@ -43,13 +43,13 @@
"yaml": "^2.2.1"
},
"devDependencies": {
"@types/node": "^25.3.3",
"@types/estree": "^1.0.0",
"@types/mdast": "^3.0.10",
"@types/node": "^18.11.19",
"@types/prettier": "^2.7.2",
"@types/unist": "^2.0.6",
"esbuild": "^0.17.5",
"typescript": "^5.0.0",
"vitest": "^0.28.4"
"esbuild": "^0.25.0",
"typescript": "^5.9.3",
"vitest": "^3.0.0"
}
}
}

View File

@ -1,153 +0,0 @@
// Vitest Snapshot v1
exports[`resolve paths > resolve source and output directories absolute paths 1`] = `
[
"/home/izznatsir/Codes/github/izznatsir/deepmark/example/i18n/$langcode$",
"/home/izznatsir/Codes/github/izznatsir/deepmark/example/docs",
"/home/izznatsir/Codes/github/izznatsir/deepmark/example/blog",
]
`;
exports[`resolve paths > resolve source and output directories absolute paths 2`] = `
[
"/home/izznatsir/Codes/github/izznatsir/deepmark/example/i18n/$langcode$",
"/home/izznatsir/Codes/github/izznatsir/deepmark/example/i18n/$langcode$/docs",
"/home/izznatsir/Codes/github/izznatsir/deepmark/example/i18n/$langcode$/blog",
]
`;
exports[`resolve paths > resolve source file absolute paths 1`] = `
{
"json": [
{
"outputFilePath": "/home/izznatsir/Codes/github/izznatsir/deepmark/example/i18n/$langcode$/code.json",
"sourceFilePath": "/home/izznatsir/Codes/github/izznatsir/deepmark/example/i18n/en/code.json",
},
{
"outputFilePath": "/home/izznatsir/Codes/github/izznatsir/deepmark/example/i18n/$langcode$/docusaurus-plugin-content-blog/options.json",
"sourceFilePath": "/home/izznatsir/Codes/github/izznatsir/deepmark/example/i18n/en/docusaurus-plugin-content-blog/options.json",
},
{
"outputFilePath": "/home/izznatsir/Codes/github/izznatsir/deepmark/example/i18n/$langcode$/docusaurus-plugin-content-docs/current.json",
"sourceFilePath": "/home/izznatsir/Codes/github/izznatsir/deepmark/example/i18n/en/docusaurus-plugin-content-docs/current.json",
},
{
"outputFilePath": "/home/izznatsir/Codes/github/izznatsir/deepmark/example/i18n/$langcode$/docusaurus-theme-classic/footer.json",
"sourceFilePath": "/home/izznatsir/Codes/github/izznatsir/deepmark/example/i18n/en/docusaurus-theme-classic/footer.json",
},
{
"outputFilePath": "/home/izznatsir/Codes/github/izznatsir/deepmark/example/i18n/$langcode$/docusaurus-theme-classic/navbar.json",
"sourceFilePath": "/home/izznatsir/Codes/github/izznatsir/deepmark/example/i18n/en/docusaurus-theme-classic/navbar.json",
},
{
"outputFilePath": "/home/izznatsir/Codes/github/izznatsir/deepmark/example/i18n/$langcode$/docs/tutorial-basics/_category_.json",
"sourceFilePath": "/home/izznatsir/Codes/github/izznatsir/deepmark/example/docs/tutorial-basics/_category_.json",
},
{
"outputFilePath": "/home/izznatsir/Codes/github/izznatsir/deepmark/example/i18n/$langcode$/docs/tutorial-extras/_category_.json",
"sourceFilePath": "/home/izznatsir/Codes/github/izznatsir/deepmark/example/docs/tutorial-extras/_category_.json",
},
],
"md": [
{
"outputFilePath": "/home/izznatsir/Codes/github/izznatsir/deepmark/example/i18n/$langcode$/docs/intro.md",
"sourceFilePath": "/home/izznatsir/Codes/github/izznatsir/deepmark/example/docs/intro.md",
},
{
"outputFilePath": "/home/izznatsir/Codes/github/izznatsir/deepmark/example/i18n/$langcode$/docs/tutorial-basics/congratulations.md",
"sourceFilePath": "/home/izznatsir/Codes/github/izznatsir/deepmark/example/docs/tutorial-basics/congratulations.md",
},
{
"outputFilePath": "/home/izznatsir/Codes/github/izznatsir/deepmark/example/i18n/$langcode$/docs/tutorial-basics/create-a-blog-post.md",
"sourceFilePath": "/home/izznatsir/Codes/github/izznatsir/deepmark/example/docs/tutorial-basics/create-a-blog-post.md",
},
{
"outputFilePath": "/home/izznatsir/Codes/github/izznatsir/deepmark/example/i18n/$langcode$/docs/tutorial-basics/create-a-document.md",
"sourceFilePath": "/home/izznatsir/Codes/github/izznatsir/deepmark/example/docs/tutorial-basics/create-a-document.md",
},
{
"outputFilePath": "/home/izznatsir/Codes/github/izznatsir/deepmark/example/i18n/$langcode$/docs/tutorial-basics/create-a-page.md",
"sourceFilePath": "/home/izznatsir/Codes/github/izznatsir/deepmark/example/docs/tutorial-basics/create-a-page.md",
},
{
"outputFilePath": "/home/izznatsir/Codes/github/izznatsir/deepmark/example/i18n/$langcode$/docs/tutorial-basics/deploy-your-site.md",
"sourceFilePath": "/home/izznatsir/Codes/github/izznatsir/deepmark/example/docs/tutorial-basics/deploy-your-site.md",
},
{
"outputFilePath": "/home/izznatsir/Codes/github/izznatsir/deepmark/example/i18n/$langcode$/docs/tutorial-basics/markdown-features.mdx",
"sourceFilePath": "/home/izznatsir/Codes/github/izznatsir/deepmark/example/docs/tutorial-basics/markdown-features.mdx",
},
{
"outputFilePath": "/home/izznatsir/Codes/github/izznatsir/deepmark/example/i18n/$langcode$/docs/tutorial-extras/manage-docs-versions.md",
"sourceFilePath": "/home/izznatsir/Codes/github/izznatsir/deepmark/example/docs/tutorial-extras/manage-docs-versions.md",
},
{
"outputFilePath": "/home/izznatsir/Codes/github/izznatsir/deepmark/example/i18n/$langcode$/docs/tutorial-extras/translate-your-site.md",
"sourceFilePath": "/home/izznatsir/Codes/github/izznatsir/deepmark/example/docs/tutorial-extras/translate-your-site.md",
},
{
"outputFilePath": "/home/izznatsir/Codes/github/izznatsir/deepmark/example/i18n/$langcode$/blog/2019-05-28-first-blog-post.md",
"sourceFilePath": "/home/izznatsir/Codes/github/izznatsir/deepmark/example/blog/2019-05-28-first-blog-post.md",
},
{
"outputFilePath": "/home/izznatsir/Codes/github/izznatsir/deepmark/example/i18n/$langcode$/blog/2019-05-29-long-blog-post.md",
"sourceFilePath": "/home/izznatsir/Codes/github/izznatsir/deepmark/example/blog/2019-05-29-long-blog-post.md",
},
{
"outputFilePath": "/home/izznatsir/Codes/github/izznatsir/deepmark/example/i18n/$langcode$/blog/2021-08-01-mdx-blog-post.mdx",
"sourceFilePath": "/home/izznatsir/Codes/github/izznatsir/deepmark/example/blog/2021-08-01-mdx-blog-post.mdx",
},
{
"outputFilePath": "/home/izznatsir/Codes/github/izznatsir/deepmark/example/i18n/$langcode$/blog/2021-08-26-welcome/index.md",
"sourceFilePath": "/home/izznatsir/Codes/github/izznatsir/deepmark/example/blog/2021-08-26-welcome/index.md",
},
],
"others": [
{
"outputFilePath": "/home/izznatsir/Codes/github/izznatsir/deepmark/example/i18n/$langcode$/docs/tutorial-extras/img/docsVersionDropdown.png",
"sourceFilePath": "/home/izznatsir/Codes/github/izznatsir/deepmark/example/docs/tutorial-extras/img/docsVersionDropdown.png",
},
{
"outputFilePath": "/home/izznatsir/Codes/github/izznatsir/deepmark/example/i18n/$langcode$/docs/tutorial-extras/img/localeDropdown.png",
"sourceFilePath": "/home/izznatsir/Codes/github/izznatsir/deepmark/example/docs/tutorial-extras/img/localeDropdown.png",
},
{
"outputFilePath": "/home/izznatsir/Codes/github/izznatsir/deepmark/example/i18n/$langcode$/blog/2021-08-26-welcome/docusaurus-plushie-banner.jpeg",
"sourceFilePath": "/home/izznatsir/Codes/github/izznatsir/deepmark/example/blog/2021-08-26-welcome/docusaurus-plushie-banner.jpeg",
},
],
"yaml": [
{
"outputFilePath": "/home/izznatsir/Codes/github/izznatsir/deepmark/example/i18n/$langcode$/blog/authors.yml",
"sourceFilePath": "/home/izznatsir/Codes/github/izznatsir/deepmark/example/blog/authors.yml",
},
],
}
`;
exports[`resolve paths with files include and exclude patterns > resolve source file absolute paths 1`] = `
{
"json": [],
"md": [
{
"outputFilePath": "/home/izznatsir/Codes/github/izznatsir/deepmark/example/i18n/$langcode$/blog/2019-05-28-first-blog-post.md",
"sourceFilePath": "/home/izznatsir/Codes/github/izznatsir/deepmark/example/blog/2019-05-28-first-blog-post.md",
},
{
"outputFilePath": "/home/izznatsir/Codes/github/izznatsir/deepmark/example/i18n/$langcode$/blog/2019-05-29-long-blog-post.md",
"sourceFilePath": "/home/izznatsir/Codes/github/izznatsir/deepmark/example/blog/2019-05-29-long-blog-post.md",
},
{
"outputFilePath": "/home/izznatsir/Codes/github/izznatsir/deepmark/example/i18n/$langcode$/blog/2021-08-01-mdx-blog-post.mdx",
"sourceFilePath": "/home/izznatsir/Codes/github/izznatsir/deepmark/example/blog/2021-08-01-mdx-blog-post.mdx",
},
],
"others": [],
"yaml": [
{
"outputFilePath": "/home/izznatsir/Codes/github/izznatsir/deepmark/example/i18n/$langcode$/blog/authors.yml",
"sourceFilePath": "/home/izznatsir/Codes/github/izznatsir/deepmark/example/blog/authors.yml",
},
],
}
`;

View File

@ -1,4 +1,4 @@
// Vitest Snapshot v1
// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html
exports[`extract frontmatter field string values > filter frontmatter fields based on configuration 1`] = `
[

View File

@ -8,7 +8,7 @@ export async function format(markdown: string) {
* to avoid unnecessary linebreaks that break translation result
*/
const mdast = getMdast(
prettier.format(markdown, {
await prettier.format(markdown, {
parser: 'mdx',
printWidth: Infinity,
proseWrap: 'never',

View File

@ -1,11 +1,22 @@
import type { SourceLanguageCode, TargetLanguageCode } from 'deepl-node';
import type { SourceLanguageCode, TargetLanguageCode, TranslateTextOptions } from 'deepl-node';
import { getMarkdown, getMdast } from './ast/mdast.js';
import type { UserConfig } from './config.js';
import { resolveConfig } from './config.js';
import { extractMdastStrings } from './extract.js';
import { format } from './format.js';
import { replaceMdastStrings } from './replace.js';
import { translateStrings } from './translate.js';
/**
* Options to control which parts of the markdown are translated.
*/
export type TranslateOptions = Omit<UserConfig, 'sourceLanguage' | 'outputLanguages' | 'directories'> & {
/** DeepL API key. Falls back to `DEEPL_AUTH_KEY` env var if not provided. */
apiKey?: string;
/** DeepL translation options (tagHandling, splitSentences, formality, glossaryId, etc.) */
deeplOptions?: TranslateTextOptions;
};
/**
* Translate markdown/MDX content from one language to another using DeepL.
*
@ -14,6 +25,7 @@ import { translateStrings } from './translate.js';
* @param content - Markdown or MDX string to translate
* @param sourceLang - Source language code (e.g. 'en', 'de', 'fr')
* @param targetLang - Target language code (e.g. 'de', 'en-US', 'fr')
* @param options - Optional config to control extraction (frontmatter, jsx, html, etc.)
* @returns Translated markdown string
*
* @example
@ -27,13 +39,16 @@ import { translateStrings } from './translate.js';
export async function translate(
content: string,
sourceLang: SourceLanguageCode,
targetLang: TargetLanguageCode
targetLang: TargetLanguageCode,
options?: TranslateOptions
): Promise<string> {
// Build a default config suitable for general markdown translation
const { apiKey, deeplOptions, ...configOptions } = options ?? {};
const config = resolveConfig({
sourceLanguage: sourceLang,
outputLanguages: [targetLang],
directories: [['', '']]
directories: [['', '']],
...configOptions
});
// Format, parse, extract translatable strings
@ -44,11 +59,11 @@ export async function translate(
if (strings.length === 0) return content;
// Translate via DeepL
const translated = await translateStrings(strings, sourceLang, targetLang);
const translated = await translateStrings(strings, sourceLang, targetLang, apiKey, deeplOptions);
// Replace strings in the AST and serialize back to markdown
const result = replaceMdastStrings({ mdast, strings: translated, config });
return getMarkdown(result);
}
export type { SourceLanguageCode, TargetLanguageCode } from 'deepl-node';
export type { SourceLanguageCode, TargetLanguageCode, TranslateTextOptions } from 'deepl-node';

View File

@ -1,49 +1,62 @@
import type { SourceLanguageCode, TargetLanguageCode } from 'deepl-node';
import type { SourceLanguageCode, TargetLanguageCode, TranslateTextOptions } from 'deepl-node';
import { Translator } from 'deepl-node';
const DEFAULT_BATCH_SIZE = 50;
const MAX_RETRIES = 3;
/**
* Translate an array of strings from sourceLang to targetLang using DeepL.
* Batches requests in groups of 10.
* Requires DEEPL_AUTH_KEY environment variable.
* Batches requests and retries on rate-limit (429) or server (5xx) errors.
*/
export async function translateStrings(
strings: string[],
sourceLang: SourceLanguageCode,
targetLang: TargetLanguageCode
targetLang: TargetLanguageCode,
apiKey?: string,
deeplOptions?: TranslateTextOptions,
batchSize: number = DEFAULT_BATCH_SIZE
): Promise<string[]> {
if (strings.length === 0) return [];
const DEEPL_AUTH_KEY = process.env.DEEPL_AUTH_KEY;
if (!DEEPL_AUTH_KEY) throw new Error('DEEPL_AUTH_KEY environment variable must be set');
const key = apiKey ?? process.env.DEEPL_AUTH_KEY;
if (!key) throw new Error('DeepL API key must be provided via options.apiKey or DEEPL_AUTH_KEY environment variable');
const deepl = new Translator(DEEPL_AUTH_KEY);
const deepl = new Translator(key);
const translations: string[] = new Array(strings.length).fill('');
const queue: [index: number, string: string][] = [];
for (const [index, string] of strings.entries()) {
queue.push([index, string]);
const textOptions: TranslateTextOptions = {
tagHandling: 'html',
splitSentences: 'nonewlines',
...deeplOptions
};
if (index === strings.length - 1 || queue.length === 10) {
const indexes = queue.map(([i]) => i);
const batch = queue.map(([, s]) => s);
for (let i = 0; i < strings.length; i += batchSize) {
const batch = strings.slice(i, i + batchSize);
const results = await deepl.translateText(
batch,
sourceLang,
targetLang,
{
tagHandling: 'html',
splitSentences: 'nonewlines'
}
);
const results = await retry(() =>
deepl.translateText(batch, sourceLang, targetLang, textOptions)
);
for (let j = 0; j < indexes.length; j++) {
translations[indexes[j]] = results[j].text;
}
queue.length = 0;
for (let j = 0; j < batch.length; j++) {
translations[i + j] = results[j].text;
}
}
return translations;
}
async function retry<T>(fn: () => Promise<T>, retries = MAX_RETRIES): Promise<T> {
for (let attempt = 0; ; attempt++) {
try {
return await fn();
} catch (err: any) {
const status = err?.statusCode ?? err?.status;
const retryable = status === 429 || status === 456 || (status >= 500 && status < 600);
if (!retryable || attempt >= retries) throw err;
const delay = Math.min(1000 * 2 ** attempt, 10_000);
await new Promise((r) => setTimeout(r, delay));
}
}
}

View File

@ -1,7 +1,17 @@
import { readFileSync } from 'fs';
import np from 'path';
const CWD = process.cwd();
// Load .env into process.env for tests
try {
const env = readFileSync(np.resolve(CWD, '.env'), 'utf-8');
for (const line of env.split('\n')) {
const match = line.match(/^\s*([\w.-]+)\s*=\s*"?([^"]*)"?\s*$/);
if (match) process.env[match[1]] ??= match[2];
}
} catch { }
/** @type { import('vite').UserConfig } */
export default {
resolve: {