Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions .changeset/blue-pens-swim.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
---
"@lingo.dev/_spec": minor
"lingo.dev": minor
---

Add support for locked patterns in MDX loader

This change adds support for preserving specific patterns in MDX files during translation, including:

- !params syntax for parameter documentation
- !! parameter_name headings
- !type declarations
- !required flags
- !values lists

The implementation adds a new config version 1.7 with a "lockedPatterns" field that accepts an array of regex patterns to be preserved during translation.
12 changes: 11 additions & 1 deletion i18n.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"version": 1.2,
"version": 1.7,
"locale": {
"source": "en",
"targets": [
Expand All @@ -23,6 +23,16 @@
"include": [
"readme/[locale].md"
]
},
"mdx": {
"include": [],
"lockedPatterns": [
"!params",
"!! [\\w_]+",
"!type [\\w<>\\[\\]\"',]+",
"!required",
"!values [\\s\\S]*?(?=\\n\\n|$)"
]
}
},
"$schema": "https://lingo.dev/schema/i18n.json"
Expand Down
2 changes: 2 additions & 0 deletions packages/cli/src/cli/loaders/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ import createMdxFrontmatterSplitLoader from "./mdx2/frontmatter-split";
import createMdxCodePlaceholderLoader from "./mdx2/code-placeholder";
import createLocalizableMdxDocumentLoader from "./mdx2/localizable-document";
import createMdxSectionsSplit2Loader from "./mdx2/sections-split-2";
import createMdxLockedPatternsLoader from "./mdx2/locked-patterns";

type BucketLoaderOptions = {
isCacheRestore: boolean;
Expand Down Expand Up @@ -119,6 +120,7 @@ export default function createBucketLoader(
bucketPathPattern,
}),
createMdxCodePlaceholderLoader(),
createMdxLockedPatternsLoader(),
createMdxFrontmatterSplitLoader(),
createMdxSectionsSplit2Loader(),
createLocalizableMdxDocumentLoader(),
Expand Down
233 changes: 233 additions & 0 deletions packages/cli/src/cli/loaders/mdx2/locked-patterns.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,233 @@
import { describe, it, expect } from "vitest";
import createMdxLockedPatternsLoader from "./locked-patterns";
import dedent from "dedent";

describe("MDX Locked Patterns Loader", () => {
describe("Basic functionality", () => {
it("should preserve content matching patterns", async () => {
const loader = createMdxLockedPatternsLoader();
loader.setDefaultLocale("en");

const md = dedent`
# Title

Some content.

!params

!! parameter_name

!type string
`;

const result = await loader.pull("en", md);

const placeholderRegex = /---LOCKED-PATTERN-[0-9a-f]+---/g;
const placeholders = result.content.match(placeholderRegex) || [];
expect(placeholders.length).toBe(3); // Three patterns should be replaced

const sanitizedContent = result.content
.replace(placeholderRegex, "---PLACEHOLDER---");

const expectedSanitized = dedent`
# Title

Some content.

---PLACEHOLDER---

---PLACEHOLDER---

---PLACEHOLDER---
`;

expect(sanitizedContent).toBe(expectedSanitized);

const translated = {
...result,
content: result.content
.replace("# Title", "# Título")
.replace("Some content.", "Algún contenido.")
};

const pushed = await loader.push("es", translated);

const expectedPushed = dedent`
# Título

Algún contenido.

!params

!! parameter_name

!type string
`;

expect(pushed).toBe(expectedPushed);
});
});

describe("Real-world patterns", () => {
it("should handle !hover syntax in code blocks", async () => {
const loader = createMdxLockedPatternsLoader();
loader.setDefaultLocale("en");

const md = dedent`
\`\`\`js
const x = 1;
const pubkey = "vines1vzrYbzLMRdu58ou5XTby4qAqVRLmqo36NKPTg";
\`\`\`
`;

const result = await loader.pull("en", md);

const placeholderRegex = /---LOCKED-PATTERN-[0-9a-f]+---/g;
const placeholders = result.content.match(placeholderRegex) || [];
expect(placeholders.length).toBe(0); // No patterns should be replaced

const pushed = await loader.push("es", result);

expect(pushed).toBe(md);
});

it("should handle !! parameter headings", async () => {
const loader = createMdxLockedPatternsLoader();
loader.setDefaultLocale("en");

const md = dedent`
# Parameters

!! pubkey

The public key of the account to query.

!! encoding

Encoding format for the returned Account data.
`;

const result = await loader.pull("en", md);

const placeholderRegex = /---LOCKED-PATTERN-[0-9a-f]+---/g;
const placeholders = result.content.match(placeholderRegex) || [];
expect(placeholders.length).toBe(2); // Two patterns should be replaced

const sanitizedContent = result.content
.replace(placeholderRegex, "---PLACEHOLDER---");

const expectedSanitized = dedent`
# Parameters

---PLACEHOLDER---

The public key of the account to query.

---PLACEHOLDER---

Encoding format for the returned Account data.
`;

expect(sanitizedContent).toBe(expectedSanitized);

const translated = {
...result,
content: result.content
.replace("# Parameters", "# Parámetros")
.replace("The public key of the account to query.", "La clave pública de la cuenta a consultar.")
.replace("Encoding format for the returned Account data.", "Formato de codificación para los datos de la cuenta devueltos.")
};

const pushed = await loader.push("es", translated);

const expectedPushed = dedent`
# Parámetros

!! pubkey

La clave pública de la cuenta a consultar.

!! encoding

Formato de codificación para los datos de la cuenta devueltos.
`;

expect(pushed).toBe(expectedPushed);
});

it("should handle !type, !required, and !values declarations", async () => {
const loader = createMdxLockedPatternsLoader();
loader.setDefaultLocale("en");

const md = dedent`
!! pubkey

!type string
!required

The public key of the account to query.

!! encoding

!type string
!values "base58" (default), "base64", "jsonParsed"

Encoding format for the returned Account data.
`;

const result = await loader.pull("en", md);

const placeholderRegex = /---LOCKED-PATTERN-[0-9a-f]+---/g;
const placeholders = result.content.match(placeholderRegex) || [];
expect(placeholders.length).toBe(6); // Six patterns should be replaced

const sanitizedContent = result.content
.replace(placeholderRegex, "---PLACEHOLDER---");

const expectedSanitized = dedent`
---PLACEHOLDER---

---PLACEHOLDER---
---PLACEHOLDER---

The public key of the account to query.

---PLACEHOLDER---

---PLACEHOLDER---
---PLACEHOLDER---

Encoding format for the returned Account data.
`;

expect(sanitizedContent).toBe(expectedSanitized);

const translated = {
...result,
content: result.content
.replace("The public key of the account to query.", "La clave pública de la cuenta a consultar.")
.replace("Encoding format for the returned Account data.", "Formato de codificación para los datos de la cuenta devueltos.")
};

const pushed = await loader.push("es", translated);

const expectedPushed = dedent`
!! pubkey

!type string
!required

La clave pública de la cuenta a consultar.

!! encoding

!type string
!values "base58" (default), "base64", "jsonParsed"

Formato de codificación para los datos de la cuenta devueltos.
`;

expect(pushed).toBe(expectedPushed);
});
});
});
87 changes: 87 additions & 0 deletions packages/cli/src/cli/loaders/mdx2/locked-patterns.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
import { ILoader } from "../_types";
import { createLoader } from "../_utils";
import { md5 } from "../../utils/md5";
import _ from "lodash";
import { I18nConfig } from "@lingo.dev/_spec";

/**
* Extracts content matching regex patterns and replaces it with placeholders.
* Returns the transformed content and a mapping of placeholders to original content.
*/
function extractLockedPatterns(
content: string,
patterns: string[] = []
): {
content: string;
lockedPlaceholders: Record<string, string>;
} {
let finalContent = content;
const lockedPlaceholders: Record<string, string> = {};

if (!patterns || patterns.length === 0) {
return { content: finalContent, lockedPlaceholders };
}

for (const patternStr of patterns) {
try {
const pattern = new RegExp(patternStr, "gm");
const matches = Array.from(finalContent.matchAll(pattern));

for (const match of matches) {
const matchedText = match[0];
const matchHash = md5(matchedText);
const placeholder = `---LOCKED-PATTERN-${matchHash}---`;

lockedPlaceholders[placeholder] = matchedText;
finalContent = finalContent.replace(matchedText, placeholder);
}
} catch (error) {
console.warn(`Invalid regex pattern: ${patternStr}`);
}
}

return {
content: finalContent,
lockedPlaceholders,
};
}

export interface MdxWithLockedPatterns {
content: string;
lockedPlaceholders: Record<string, string>;
}

export default function createMdxLockedPatternsLoader(): ILoader<
string,
MdxWithLockedPatterns
> {
return createLoader({
async pull(locale, input) {
const patterns = [
"!params",
"!! [\\w_]+",
"!type [\\w<>\\[\\]\"',]+",
"!required",
"!values [\\s\\S]*?(?=\\n\\n|$)",
"// !hover[\\s\\S]*?(?=\\n|$)",
"// !hover\\([\\d:]+\\)[\\s\\S]*?(?=\\n|$)"
];
Comment thread
maxprilutskiy marked this conversation as resolved.
Outdated

const { content, lockedPlaceholders } = extractLockedPatterns(input || "", patterns);

return {
content,
lockedPlaceholders,
};
},

async push(locale, data) {
let result = data.content;
for (const [placeholder, original] of Object.entries(data.lockedPlaceholders)) {
result = result.replaceAll(placeholder, original);
}

return result;
},
});
Comment on lines +59 to +78
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. should return string, not obkect
  2. in push, use pullInput +extractLockedPatterns again to recreate the lockedPlaceholders map

}
Loading