Skip to content

Commit 6a54271

Browse files
committed
feat: implement LLM output sanitization, refine Diataxis prompts with explicit formatting rules, and update GitHub docs path to content/docs while escaping frontmatter quotes.
1 parent 9f4d4bb commit 6a54271

7 files changed

Lines changed: 245 additions & 75 deletions

File tree

apps/web/app/api/projects/route.ts

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -34,13 +34,15 @@ export async function POST(req: NextRequest) {
3434
// Use the access token (ghu_*) to authenticate
3535
const octokit = new Octokit({ auth: tokenResult.accessToken });
3636

37-
// Create Repo in user's account
38-
const repoName = sanitizeRepoName(name);
39-
const { data: createdRepo } = await octokit.request("POST /user/repos", {
37+
// Create Repo from template in user's account
38+
const repoName = `${sanitizeRepoName(name)}-docs`;
39+
const { data: createdRepo } = await octokit.request("POST /repos/{template_owner}/{template_repo}/generate", {
40+
template_owner: "SecrinLabs",
41+
template_repo: "secrin-docs-template",
4042
name: repoName,
41-
description: description || `Project: ${name}`,
43+
description: description || `Documentation for ${name} - powered by Secrin`,
4244
private: true,
43-
auto_init: true,
45+
include_all_branches: false,
4446
headers: {
4547
"X-GitHub-Api-Version": "2022-11-28",
4648
},

packages/arc42gen/diataxis/generator.py

Lines changed: 47 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from ..providers.base import BaseLLMProvider
1313
from ..providers.factory import create_llm_provider
1414
from ..templates import load_prompt
15+
from ..utils.sanitizer import sanitize_llm_content, sanitize_list_item, is_placeholder_content
1516
from .models import (
1617
Tutorial,
1718
TutorialStep,
@@ -84,15 +85,15 @@ def _build_tutorial_prompt(self, analysis: AnalysisResult) -> str:
8485
)
8586

8687
def _parse_tutorial_response(self, response: str, default_title: str) -> Tutorial:
87-
goal = self._extract_section(response, "GOAL") or "Complete the tutorial"
88-
time = self._extract_section(response, "TIME") or ""
88+
goal = sanitize_llm_content(self._extract_section(response, "GOAL") or "Complete the tutorial")
89+
time = sanitize_llm_content(self._extract_section(response, "TIME") or "")
8990

9091
prerequisites = []
9192
prereq_text = self._extract_section(response, "PREREQUISITES")
9293
if prereq_text:
9394
for line in prereq_text.split('\n'):
94-
line = line.strip().lstrip('-').strip()
95-
if line:
95+
line = sanitize_list_item(line.strip().lstrip('-').strip())
96+
if line and not is_placeholder_content(line):
9697
prerequisites.append(line)
9798

9899
steps = []
@@ -106,21 +107,21 @@ def _parse_tutorial_response(self, response: str, default_title: str) -> Tutoria
106107

107108
title_match = re.search(r"TITLE:\s*(.+?)(?=\n|INSTRUCTIONS)", step_text)
108109
if title_match:
109-
title = title_match.group(1).strip()
110+
title = sanitize_llm_content(title_match.group(1).strip())
110111

111112
instr_match = re.search(r"INSTRUCTIONS:\s*(.+?)(?=\nCODE|CHECKPOINT|$)", step_text, re.DOTALL)
112113
if instr_match:
113-
instructions = instr_match.group(1).strip()
114+
instructions = sanitize_llm_content(instr_match.group(1).strip())
114115

115116
code_match = re.search(r"CODE:\s*(.+?)(?=\nCHECKPOINT|$)", step_text, re.DOTALL)
116117
if code_match:
117-
code = code_match.group(1).strip()
118+
code = code_match.group(1).strip() # Don't sanitize code
118119

119120
check_match = re.search(r"CHECKPOINT:\s*(.+?)$", step_text, re.DOTALL)
120121
if check_match:
121-
checkpoint = check_match.group(1).strip()
122+
checkpoint = sanitize_llm_content(check_match.group(1).strip())
122123

123-
if title:
124+
if title and not is_placeholder_content(title):
124125
steps.append(TutorialStep(
125126
title=title,
126127
instructions=instructions,
@@ -132,8 +133,8 @@ def _parse_tutorial_response(self, response: str, default_title: str) -> Tutoria
132133
next_text = self._extract_section(response, "NEXT_STEPS")
133134
if next_text:
134135
for line in next_text.split('\n'):
135-
line = line.strip().lstrip('-').strip()
136-
if line:
136+
line = sanitize_list_item(line.strip().lstrip('-').strip())
137+
if line and not is_placeholder_content(line):
137138
next_steps.append(line)
138139

139140
return Tutorial(
@@ -177,34 +178,55 @@ def _build_troubleshooting_prompt(self, analysis: AnalysisResult) -> str:
177178
)
178179

179180
def _parse_howto_response(self, response: str, default_title: str) -> HowToGuide:
180-
problem = self._extract_section(response, "PROBLEM") or ""
181-
time = self._extract_section(response, "TIME") or ""
181+
problem = sanitize_llm_content(self._extract_section(response, "PROBLEM") or "")
182+
time = sanitize_llm_content(self._extract_section(response, "TIME") or "")
182183

183184
prerequisites = []
184185
prereq_text = self._extract_section(response, "PREREQUISITES")
185186
if prereq_text:
186187
for line in prereq_text.split('\n'):
187-
line = line.strip().lstrip('-').strip()
188-
if line:
188+
line = sanitize_list_item(line.strip().lstrip('-').strip())
189+
if line and not is_placeholder_content(line):
189190
prerequisites.append(line)
190191

191192
steps = []
192193
steps_text = self._extract_section(response, "STEPS")
193194
if steps_text:
194195
for line in steps_text.split('\n'):
195-
line = line.strip().lstrip('-').lstrip('0123456789.').strip()
196-
if line:
196+
line = sanitize_list_item(line.strip().lstrip('-').lstrip('0123456789.').strip())
197+
if line and not is_placeholder_content(line):
197198
steps.append(line)
198199

199200
troubleshooting = []
200-
for i in range(1, 10):
201-
prob = self._extract_section(response, f"PROBLEM_{i}")
202-
sol = self._extract_section(response, f"SOLUTION_{i}")
203-
if prob and sol:
204-
troubleshooting.append(TroubleshootingItem(
205-
problem=prob,
206-
solution=sol,
207-
))
201+
# Parse new ISSUE/FIX format
202+
troubleshooting_text = self._extract_section(response, "TROUBLESHOOTING")
203+
if troubleshooting_text:
204+
issue_pattern = r'ISSUE:\s*(.+?)(?=\nFIX:|$)'
205+
fix_pattern = r'FIX:\s*(.+?)(?=\nISSUE:|$)'
206+
issues = re.findall(issue_pattern, troubleshooting_text, re.DOTALL)
207+
fixes = re.findall(fix_pattern, troubleshooting_text, re.DOTALL)
208+
for issue, fix in zip(issues, fixes):
209+
issue_clean = sanitize_llm_content(issue)
210+
fix_clean = sanitize_llm_content(fix)
211+
if issue_clean and fix_clean and not is_placeholder_content(issue_clean):
212+
troubleshooting.append(TroubleshootingItem(
213+
problem=issue_clean,
214+
solution=fix_clean,
215+
))
216+
217+
# Fallback: try old PROBLEM_N/SOLUTION_N format for backward compatibility
218+
if not troubleshooting:
219+
for i in range(1, 10):
220+
prob = self._extract_section(response, f"PROBLEM_{i}")
221+
sol = self._extract_section(response, f"SOLUTION_{i}")
222+
if prob and sol:
223+
prob_clean = sanitize_llm_content(prob)
224+
sol_clean = sanitize_llm_content(sol)
225+
if prob_clean and sol_clean and not is_placeholder_content(prob_clean):
226+
troubleshooting.append(TroubleshootingItem(
227+
problem=prob_clean,
228+
solution=sol_clean,
229+
))
208230

209231
return HowToGuide(
210232
title=default_title,

packages/arc42gen/jobs.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -45,12 +45,15 @@ def _update_progress(step: int, total: int, message: str, status: str = "running
4545

4646
def add_mdx_frontmatter(content: str, title: str, description: str = "") -> str:
4747
"""Add MDX frontmatter for Fumadocs compatibility."""
48-
frontmatter = f"""---
49-
title: {title}
50-
description: {description or title}
48+
# Escape double quotes in title and description for YAML safety
49+
safe_title = title.replace('"', '\\"')
50+
safe_description = (description or title).replace('"', '\\"')
51+
frontmatter = f'''---
52+
title: "{safe_title}"
53+
description: "{safe_description}"
5154
---
5255
53-
"""
56+
'''
5457
return frontmatter + content
5558

5659

@@ -72,13 +75,13 @@ def _commit_to_github(
7275
ref = repo.get_git_ref(f"heads/{branch}")
7376
latest_commit = repo.get_git_commit(ref.object.sha)
7477

75-
# Create tree elements for all files (under docs/ folder)
78+
# Create tree elements for all files (under content/docs/ folder)
7679
tree_elements = []
7780
for path, content in files.items():
7881
blob = repo.create_git_blob(content, "utf-8")
7982
tree_elements.append(
8083
InputGitTreeElement(
81-
path=f"docs/{path}",
84+
path=f"content/docs/{path}",
8285
mode="100644",
8386
type="blob",
8487
sha=blob.sha,

packages/arc42gen/templates/prompts/diataxis/howto.txt

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -5,26 +5,32 @@ Create a How-To guide for "{topic}" for this codebase:
55
REPOSITORY: {repo_name}
66
LANGUAGE: {language}
77

8+
IMPORTANT FORMATTING RULES:
9+
- Write final, complete content - NOT placeholders or templates
10+
- Do NOT use brackets like [description] or [step here]
11+
- Do NOT include template markers in your output
12+
- Write clear, actionable text that can be used directly
13+
814
Generate a how-to guide with EXACTLY this format:
915

1016
PROBLEM:
11-
[What problem does this guide solve]
17+
A clear description of what problem this guide helps solve
1218

1319
PREREQUISITES:
14-
- [Prerequisite 1]
15-
- [Prerequisite 2]
20+
- First prerequisite needed
21+
- Second prerequisite needed
1622

1723
TIME:
18-
[Estimated time]
24+
Estimated time to complete
1925

2026
STEPS:
21-
- [Step 1]
22-
- [Step 2]
23-
- [Step 3]
27+
- First step with clear instructions
28+
- Second step with clear instructions
29+
- Third step with clear instructions
2430

2531
TROUBLESHOOTING:
26-
PROBLEM_1: [Common problem]
27-
SOLUTION_1: [How to fix it]
32+
ISSUE: Description of a common problem developers might face
33+
FIX: How to resolve this issue
2834

29-
PROBLEM_2: [Another common problem]
30-
SOLUTION_2: [How to fix it]
35+
ISSUE: Description of another common problem
36+
FIX: How to resolve this issue

packages/arc42gen/templates/prompts/diataxis/troubleshooting.txt

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,12 @@ Create a Troubleshooting guide for this codebase:
55
REPOSITORY: {repo_name}
66
LANGUAGE: {language}
77

8+
IMPORTANT FORMATTING RULES:
9+
- Write final, complete content - NOT placeholders or templates
10+
- Do NOT use brackets like [description] or [step here]
11+
- Do NOT include template markers in your output
12+
- Write clear, actionable text that can be used directly
13+
814
Generate a troubleshooting guide with EXACTLY this format:
915

1016
PROBLEM:
@@ -15,18 +21,18 @@ PREREQUISITES:
1521
- Project set up locally
1622

1723
STEPS:
18-
- Identify the error message
19-
- Check the troubleshooting section below
20-
- Follow the solution steps
24+
- Identify the error message you are seeing
25+
- Find the matching issue in the troubleshooting section below
26+
- Follow the solution steps provided
2127

2228
TROUBLESHOOTING:
23-
PROBLEM_1: [Common issue like "Dependencies fail to install"]
24-
SOLUTION_1: [How to fix it]
29+
ISSUE: Dependencies fail to install or version conflicts occur
30+
FIX: Delete node_modules or virtual environment, clear cache, and reinstall dependencies
2531

26-
PROBLEM_2: [Common issue like "Tests fail"]
27-
SOLUTION_2: [How to fix it]
32+
ISSUE: Tests fail unexpectedly on a fresh clone
33+
FIX: Ensure all environment variables are set and database migrations are run
2834

29-
PROBLEM_3: [Common issue]
30-
SOLUTION_3: [How to fix it]
35+
ISSUE: Build or compilation errors after pulling latest changes
36+
FIX: Check for breaking changes in dependencies and update configuration if needed
3137

32-
List 3-5 common problems developers might face.
38+
List 3-5 common problems developers might face based on the codebase.

packages/arc42gen/templates/prompts/diataxis/tutorial.txt

Lines changed: 25 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -7,38 +7,45 @@ LANGUAGE: {language}
77
MODULES: {modules}
88
LOC: {total_loc}
99

10+
IMPORTANT FORMATTING RULES:
11+
- Write final, complete content - NOT placeholders or templates
12+
- Do NOT use brackets like [description] or [step here]
13+
- Do NOT include template markers in your output
14+
- Write clear, actionable text that can be used directly
15+
1016
Generate a tutorial with EXACTLY this format:
1117

1218
GOAL:
13-
[What the developer will accomplish after completing this tutorial]
19+
What the developer will accomplish after completing this tutorial
1420

1521
PREREQUISITES:
16-
- [Prerequisite 1]
17-
- [Prerequisite 2]
22+
- First prerequisite (e.g., Python 3.10+ installed)
23+
- Second prerequisite (e.g., Git installed)
1824

1925
TIME:
20-
[Estimated time, e.g., "15 minutes"]
26+
15 minutes
2127

2228
STEP_1:
23-
TITLE: [Step title]
24-
INSTRUCTIONS: [What to do in this step]
25-
CODE: [Shell commands if any]
26-
CHECKPOINT: [How to verify this step worked]
29+
TITLE: Clone the Repository
30+
INSTRUCTIONS: Clone the project repository to your local machine using git
31+
CODE: git clone https://github.com/example/repo.git && cd repo
32+
CHECKPOINT: You should see the project files in your current directory
2733

2834
STEP_2:
29-
TITLE: [Step title]
30-
INSTRUCTIONS: [What to do]
31-
CODE: [Commands]
32-
CHECKPOINT: [Verification]
35+
TITLE: Install Dependencies
36+
INSTRUCTIONS: Install the required dependencies for the project
37+
CODE: pip install -r requirements.txt
38+
CHECKPOINT: Installation completes without errors
3339

3440
STEP_3:
35-
TITLE: [Step title]
36-
INSTRUCTIONS: [What to do]
37-
CODE: [Commands]
38-
CHECKPOINT: [Verification]
41+
TITLE: Run the Application
42+
INSTRUCTIONS: Start the application to verify everything works
43+
CODE: python main.py
44+
CHECKPOINT: The application starts and displays output
3945

4046
NEXT_STEPS:
41-
- [What to explore next]
42-
- [Related documentation]
47+
- Explore the API documentation
48+
- Read the architecture overview
49+
- Try modifying a simple feature
4350

4451
Create 3-5 clear steps for getting the project running locally.

0 commit comments

Comments
 (0)