-
Notifications
You must be signed in to change notification settings - Fork 834
feat: add support for locked patterns in mdx loader #700
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from 3 commits
Commits
Show all changes
8 commits
Select commit
Hold shift + click to select a range
ac6f3c1
feat: add support for locked patterns in mdx loader
devin-ai-integration[bot] 6c7d59a
test: update locked patterns tests to use toBe instead of toMatch/toC…
devin-ai-integration[bot] 1d725a3
fix(mdx): fix locked patterns tests and improve regex patterns
devin-ai-integration[bot] e4255f9
fix: make locked patterns configurable via i18n.json
devin-ai-integration[bot] bd4443b
refactor: remove default patterns fallback in locked patterns loader
devin-ai-integration[bot] 2df97ee
refactor: update locked patterns loader to return string and use pull…
devin-ai-integration[bot] 365e64b
refactor: update locked patterns loader to properly use pullInput par…
devin-ai-integration[bot] a2ed791
Merge branch 'main' into devin/1746209518-mdx-locked-patterns
maxprilutskiy File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,16 @@ | ||
| --- | ||
| "@lingo.dev/_spec": minor | ||
| "lingo.dev": minor | ||
| --- | ||
|
|
||
| Add support for locked patterns in MDX loader | ||
|
|
||
| This change adds support for preserving specific patterns in MDX files during translation, including: | ||
|
|
||
| - !params syntax for parameter documentation | ||
| - !! parameter_name headings | ||
| - !type declarations | ||
| - !required flags | ||
| - !values lists | ||
|
|
||
| The implementation adds a new config version 1.7 with a "lockedPatterns" field that accepts an array of regex patterns to be preserved during translation. |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
233 changes: 233 additions & 0 deletions
233
packages/cli/src/cli/loaders/mdx2/locked-patterns.spec.ts
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,233 @@ | ||
| import { describe, it, expect } from "vitest"; | ||
| import createMdxLockedPatternsLoader from "./locked-patterns"; | ||
| import dedent from "dedent"; | ||
|
|
||
| describe("MDX Locked Patterns Loader", () => { | ||
| describe("Basic functionality", () => { | ||
| it("should preserve content matching patterns", async () => { | ||
| const loader = createMdxLockedPatternsLoader(); | ||
| loader.setDefaultLocale("en"); | ||
|
|
||
| const md = dedent` | ||
| # Title | ||
|
|
||
| Some content. | ||
|
|
||
| !params | ||
|
|
||
| !! parameter_name | ||
|
|
||
| !type string | ||
| `; | ||
|
|
||
| const result = await loader.pull("en", md); | ||
|
|
||
| const placeholderRegex = /---LOCKED-PATTERN-[0-9a-f]+---/g; | ||
| const placeholders = result.content.match(placeholderRegex) || []; | ||
| expect(placeholders.length).toBe(3); // Three patterns should be replaced | ||
|
|
||
| const sanitizedContent = result.content | ||
| .replace(placeholderRegex, "---PLACEHOLDER---"); | ||
|
|
||
| const expectedSanitized = dedent` | ||
| # Title | ||
|
|
||
| Some content. | ||
|
|
||
| ---PLACEHOLDER--- | ||
|
|
||
| ---PLACEHOLDER--- | ||
|
|
||
| ---PLACEHOLDER--- | ||
| `; | ||
|
|
||
| expect(sanitizedContent).toBe(expectedSanitized); | ||
|
|
||
| const translated = { | ||
| ...result, | ||
| content: result.content | ||
| .replace("# Title", "# Título") | ||
| .replace("Some content.", "Algún contenido.") | ||
| }; | ||
|
|
||
| const pushed = await loader.push("es", translated); | ||
|
|
||
| const expectedPushed = dedent` | ||
| # Título | ||
|
|
||
| Algún contenido. | ||
|
|
||
| !params | ||
|
|
||
| !! parameter_name | ||
|
|
||
| !type string | ||
| `; | ||
|
|
||
| expect(pushed).toBe(expectedPushed); | ||
| }); | ||
| }); | ||
|
|
||
| describe("Real-world patterns", () => { | ||
| it("should handle !hover syntax in code blocks", async () => { | ||
| const loader = createMdxLockedPatternsLoader(); | ||
| loader.setDefaultLocale("en"); | ||
|
|
||
| const md = dedent` | ||
| \`\`\`js | ||
| const x = 1; | ||
| const pubkey = "vines1vzrYbzLMRdu58ou5XTby4qAqVRLmqo36NKPTg"; | ||
| \`\`\` | ||
| `; | ||
|
|
||
| const result = await loader.pull("en", md); | ||
|
|
||
| const placeholderRegex = /---LOCKED-PATTERN-[0-9a-f]+---/g; | ||
| const placeholders = result.content.match(placeholderRegex) || []; | ||
| expect(placeholders.length).toBe(0); // No patterns should be replaced | ||
|
|
||
| const pushed = await loader.push("es", result); | ||
|
|
||
| expect(pushed).toBe(md); | ||
| }); | ||
|
|
||
| it("should handle !! parameter headings", async () => { | ||
| const loader = createMdxLockedPatternsLoader(); | ||
| loader.setDefaultLocale("en"); | ||
|
|
||
| const md = dedent` | ||
| # Parameters | ||
|
|
||
| !! pubkey | ||
|
|
||
| The public key of the account to query. | ||
|
|
||
| !! encoding | ||
|
|
||
| Encoding format for the returned Account data. | ||
| `; | ||
|
|
||
| const result = await loader.pull("en", md); | ||
|
|
||
| const placeholderRegex = /---LOCKED-PATTERN-[0-9a-f]+---/g; | ||
| const placeholders = result.content.match(placeholderRegex) || []; | ||
| expect(placeholders.length).toBe(2); // Two patterns should be replaced | ||
|
|
||
| const sanitizedContent = result.content | ||
| .replace(placeholderRegex, "---PLACEHOLDER---"); | ||
|
|
||
| const expectedSanitized = dedent` | ||
| # Parameters | ||
|
|
||
| ---PLACEHOLDER--- | ||
|
|
||
| The public key of the account to query. | ||
|
|
||
| ---PLACEHOLDER--- | ||
|
|
||
| Encoding format for the returned Account data. | ||
| `; | ||
|
|
||
| expect(sanitizedContent).toBe(expectedSanitized); | ||
|
|
||
| const translated = { | ||
| ...result, | ||
| content: result.content | ||
| .replace("# Parameters", "# Parámetros") | ||
| .replace("The public key of the account to query.", "La clave pública de la cuenta a consultar.") | ||
| .replace("Encoding format for the returned Account data.", "Formato de codificación para los datos de la cuenta devueltos.") | ||
| }; | ||
|
|
||
| const pushed = await loader.push("es", translated); | ||
|
|
||
| const expectedPushed = dedent` | ||
| # Parámetros | ||
|
|
||
| !! pubkey | ||
|
|
||
| La clave pública de la cuenta a consultar. | ||
|
|
||
| !! encoding | ||
|
|
||
| Formato de codificación para los datos de la cuenta devueltos. | ||
| `; | ||
|
|
||
| expect(pushed).toBe(expectedPushed); | ||
| }); | ||
|
|
||
| it("should handle !type, !required, and !values declarations", async () => { | ||
| const loader = createMdxLockedPatternsLoader(); | ||
| loader.setDefaultLocale("en"); | ||
|
|
||
| const md = dedent` | ||
| !! pubkey | ||
|
|
||
| !type string | ||
| !required | ||
|
|
||
| The public key of the account to query. | ||
|
|
||
| !! encoding | ||
|
|
||
| !type string | ||
| !values "base58" (default), "base64", "jsonParsed" | ||
|
|
||
| Encoding format for the returned Account data. | ||
| `; | ||
|
|
||
| const result = await loader.pull("en", md); | ||
|
|
||
| const placeholderRegex = /---LOCKED-PATTERN-[0-9a-f]+---/g; | ||
| const placeholders = result.content.match(placeholderRegex) || []; | ||
| expect(placeholders.length).toBe(6); // Six patterns should be replaced | ||
|
|
||
| const sanitizedContent = result.content | ||
| .replace(placeholderRegex, "---PLACEHOLDER---"); | ||
|
|
||
| const expectedSanitized = dedent` | ||
| ---PLACEHOLDER--- | ||
|
|
||
| ---PLACEHOLDER--- | ||
| ---PLACEHOLDER--- | ||
|
|
||
| The public key of the account to query. | ||
|
|
||
| ---PLACEHOLDER--- | ||
|
|
||
| ---PLACEHOLDER--- | ||
| ---PLACEHOLDER--- | ||
|
|
||
| Encoding format for the returned Account data. | ||
| `; | ||
|
|
||
| expect(sanitizedContent).toBe(expectedSanitized); | ||
|
|
||
| const translated = { | ||
| ...result, | ||
| content: result.content | ||
| .replace("The public key of the account to query.", "La clave pública de la cuenta a consultar.") | ||
| .replace("Encoding format for the returned Account data.", "Formato de codificación para los datos de la cuenta devueltos.") | ||
| }; | ||
|
|
||
| const pushed = await loader.push("es", translated); | ||
|
|
||
| const expectedPushed = dedent` | ||
| !! pubkey | ||
|
|
||
| !type string | ||
| !required | ||
|
|
||
| La clave pública de la cuenta a consultar. | ||
|
|
||
| !! encoding | ||
|
|
||
| !type string | ||
| !values "base58" (default), "base64", "jsonParsed" | ||
|
|
||
| Formato de codificación para los datos de la cuenta devueltos. | ||
| `; | ||
|
|
||
| expect(pushed).toBe(expectedPushed); | ||
| }); | ||
| }); | ||
| }); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,87 @@ | ||
| import { ILoader } from "../_types"; | ||
| import { createLoader } from "../_utils"; | ||
| import { md5 } from "../../utils/md5"; | ||
| import _ from "lodash"; | ||
| import { I18nConfig } from "@lingo.dev/_spec"; | ||
|
|
||
| /** | ||
| * Extracts content matching regex patterns and replaces it with placeholders. | ||
| * Returns the transformed content and a mapping of placeholders to original content. | ||
| */ | ||
| function extractLockedPatterns( | ||
| content: string, | ||
| patterns: string[] = [] | ||
| ): { | ||
| content: string; | ||
| lockedPlaceholders: Record<string, string>; | ||
| } { | ||
| let finalContent = content; | ||
| const lockedPlaceholders: Record<string, string> = {}; | ||
|
|
||
| if (!patterns || patterns.length === 0) { | ||
| return { content: finalContent, lockedPlaceholders }; | ||
| } | ||
|
|
||
| for (const patternStr of patterns) { | ||
| try { | ||
| const pattern = new RegExp(patternStr, "gm"); | ||
| const matches = Array.from(finalContent.matchAll(pattern)); | ||
|
|
||
| for (const match of matches) { | ||
| const matchedText = match[0]; | ||
| const matchHash = md5(matchedText); | ||
| const placeholder = `---LOCKED-PATTERN-${matchHash}---`; | ||
|
|
||
| lockedPlaceholders[placeholder] = matchedText; | ||
| finalContent = finalContent.replace(matchedText, placeholder); | ||
| } | ||
| } catch (error) { | ||
| console.warn(`Invalid regex pattern: ${patternStr}`); | ||
| } | ||
| } | ||
|
|
||
| return { | ||
| content: finalContent, | ||
| lockedPlaceholders, | ||
| }; | ||
| } | ||
|
|
||
| export interface MdxWithLockedPatterns { | ||
| content: string; | ||
| lockedPlaceholders: Record<string, string>; | ||
| } | ||
|
|
||
| export default function createMdxLockedPatternsLoader(): ILoader< | ||
| string, | ||
| MdxWithLockedPatterns | ||
| > { | ||
| return createLoader({ | ||
| async pull(locale, input) { | ||
| const patterns = [ | ||
| "!params", | ||
| "!! [\\w_]+", | ||
| "!type [\\w<>\\[\\]\"',]+", | ||
| "!required", | ||
| "!values [\\s\\S]*?(?=\\n\\n|$)", | ||
| "// !hover[\\s\\S]*?(?=\\n|$)", | ||
| "// !hover\\([\\d:]+\\)[\\s\\S]*?(?=\\n|$)" | ||
| ]; | ||
|
|
||
| const { content, lockedPlaceholders } = extractLockedPatterns(input || "", patterns); | ||
|
|
||
| return { | ||
| content, | ||
| lockedPlaceholders, | ||
| }; | ||
| }, | ||
|
|
||
| async push(locale, data) { | ||
| let result = data.content; | ||
| for (const [placeholder, original] of Object.entries(data.lockedPlaceholders)) { | ||
| result = result.replaceAll(placeholder, original); | ||
| } | ||
|
|
||
| return result; | ||
| }, | ||
| }); | ||
|
Comment on lines
+59
to
+78
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
| } | ||
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.