Skip to content

Commit 40a7294

Browse files
committed
Cleanup of misra_help automation
1 parent 21b052e commit 40a7294

2 files changed

Lines changed: 40 additions & 14 deletions

File tree

scripts/generate_rules/misra_help/extract_rules.py

Lines changed: 32 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -530,8 +530,6 @@ def extract_rules(pdf_path: Path, standard: str, cache_dir: Path) -> list[Rule]:
530530
# transform — no parsing or formatting — and keeps the output readable.
531531

532532
_CODE_FORMAT_STEPS = [
533-
# Pull "// ..." comments onto their own line.
534-
(re.compile(r"\s+//"), "\n//"),
535533
# Newline after `;` (but not inside `for( ; ; )` — the next rule catches
536534
# runs of `;` we should leave alone).
537535
(re.compile(r";\s+(?=\S)"), ";\n"),
@@ -542,17 +540,44 @@ def extract_rules(pdf_path: Path, standard: str, cache_dir: Path) -> list[Rule]:
542540
]
543541

544542

543+
def _indent_by_braces(text: str) -> str:
544+
"""Add 2-space indentation based on brace nesting depth."""
545+
lines = text.splitlines()
546+
out: list[str] = []
547+
depth = 0
548+
for line in lines:
549+
stripped = line.strip()
550+
if not stripped:
551+
out.append("")
552+
continue
553+
# Dedent for lines that start with `}`
554+
if stripped.startswith("}"):
555+
depth = max(0, depth - 1)
556+
out.append(" " * depth + stripped)
557+
# Indent after lines that end with `{`
558+
if stripped.endswith("{"):
559+
depth += 1
560+
return "\n".join(out)
561+
562+
545563
def _format_code_lines(text: str) -> str:
546564
"""Heuristically insert line breaks into a C/C++ code example that
547565
docling concatenated onto a single line. Deterministic.
566+
567+
Preserves existing multi-space alignment and inline ``//`` comments.
568+
Only inserts line breaks at ``;``, ``{``, ``}`` boundaries and adds
569+
brace-depth indentation.
548570
"""
549-
# Collapse 2+ spaces (docling sometimes inserts them where a PDF
550-
# layout break occurred) so the regexes below match reliably.
551-
s = re.sub(r"[ \t]{2,}", " ", text).strip()
571+
# Collapse runs of 3+ spaces (likely docling kerning artefacts) to
572+
# a single space, but preserve 2-space runs which may be intentional
573+
# alignment in column-style comments.
574+
s = re.sub(r"[ \t]{3,}", " ", text).strip()
552575
for pat, repl in _CODE_FORMAT_STEPS:
553576
s = pat.sub(repl, s)
554-
# Trim any leading/trailing whitespace on each resulting line.
555-
return "\n".join(line.rstrip() for line in s.splitlines()).strip()
577+
# Trim trailing whitespace on each line.
578+
s = "\n".join(line.rstrip() for line in s.splitlines()).strip()
579+
# Add indentation based on brace depth.
580+
return _indent_by_braces(s)
556581

557582

558583
# ----------------------------------------------------------------------------

scripts/generate_rules/misra_help/rewrite_help.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -269,11 +269,6 @@ def system_prompt() -> str:
269269
" (```c or ```cpp).",
270270
" - Numbered exceptions must use \"1.\", \"2.\", \"3.\""
271271
" format, never bullets.",
272-
" - In code blocks, each statement should be on its own"
273-
" line.",
274-
" - Compliance comments (/* Compliant */,"
275-
" /* Non-compliant */) must stay on the same line as"
276-
" the statement they annotate.",
277272
"",
278273
"4. Heading title: the \"# <Rule|Dir> X.Y[.Z]: <title>\""
279274
" heading must use the title from the .ql @name metadata"
@@ -308,8 +303,14 @@ def system_prompt() -> str:
308303
"- Do NOT remove content that is present in the input"
309304
" (unless it is a PDF artifact listed above).",
310305
"- Do NOT change technical meaning, even subtly.",
311-
"- Do NOT modify identifiers, variable names, or code"
312-
" (except whitespace formatting in code blocks).",
306+
"- Do NOT modify code inside fenced code blocks."
307+
" Preserve indentation, brace placement, comment"
308+
" positions, and alignment exactly as given.",
309+
"- Do NOT change brace placement style (e.g. Allman to"
310+
" K&R or vice versa).",
311+
"- Do NOT merge separate fenced code blocks into one or"
312+
" convert prose paragraphs between code blocks into"
313+
" code comments.",
313314
"- Do NOT wrap the entire output in a fenced code block.",
314315
"",
315316
"Output ONLY the corrected Markdown file content."

0 commit comments

Comments
 (0)