Skip to content

Commit 1a3ffc8

Browse files
authored
Merge branch 'dev' into cicd-enchance
2 parents 35aa9cc + b7380f0 commit 1a3ffc8

8 files changed

Lines changed: 605 additions & 449 deletions

File tree

CHANGELOG.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1616

1717
## [NextRelease]
1818

19+
### Fixed
20+
21+
- **Deterministic Processing**: Fixed a critical bug that caused line numbers in the table of contents to change on every run. This was due to inconsistent newline handling after removing an existing agent docstring. The process is now fully idempotent.
22+
- **Robust Docstring Removal**: Improved the detection logic to correctly find and remove all agent-generated docstrings, even when located in the middle of a file or when multiple (erroneous) docstrings were present. This prevents docstring duplication on repeated runs.
23+
- **Manual Docstring Preservation**: Ensured that manual docstrings are no longer reformatted or modified unless they are being merged with an agent-generated table of contents.
24+
- **Version-Only Change Skipping**: Fixed a bug where files were being unnecessarily modified when only the version number in the auto-generated header differed, while the actual content structure remained unchanged. The tool now performs normalized content comparison that ignores version differences, preventing unnecessary file modifications after Agent Docstrings version updates.
25+
1926
### Documentation
2027

2128
- **Contribution Guide**: Added a new `CONTRIBUTING.md` file with detailed guidelines for development workflow and the release process.

agent_docstrings/core.py

Lines changed: 25 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -399,12 +399,16 @@ def process_file(path: Path, verbose: bool = False, beta: bool = False) -> None:
399399
return
400400

401401
# ! Calculate the correct line offset for the final positions
402-
# * First create a temporary header to count its lines
402+
# * To ensure deterministic line numbers, we need to calculate the offset
403+
# * based on the final file structure, not the intermediate state
404+
405+
# * First, determine how many lines will be in the final header
403406
temp_header = _format_header(classes, functions, language, 0)
404-
temp_header_lines = temp_header.splitlines()
407+
temp_header_line_count = len(temp_header.splitlines())
405408

406-
# * Calculate offset: preserved header lines + generated header lines
407-
line_offset = header_end_line + len(temp_header_lines)
409+
# * Calculate total offset: preserved header lines + generated header lines
410+
# * This represents where the cleaned body will start in the final file
411+
line_offset = header_end_line + temp_header_line_count
408412

409413
# ! Language-specific adjustments for line numbering
410414
if language == "go":
@@ -462,9 +466,9 @@ def process_file(path: Path, verbose: bool = False, beta: bool = False) -> None:
462466

463467
if end_idx is not None:
464468
# Compute auto header content lines with correct offset for merge
465-
# temp_header_lines holds the auto header lines including delimiters
466-
# content_lines length is temp_header_lines minus start/end markers
467-
offset_override = len(temp_header_lines) - 2
469+
# temp_header_line_count holds the auto header line count including delimiters
470+
# content_lines length is temp_header_line_count minus start/end markers
471+
offset_override = temp_header_line_count - 2
468472
# Generate only the header content lines (without triple-quote delimiters)
469473
header_inner = _get_header_content_lines(
470474
classes, functions, language, offset_override
@@ -498,8 +502,20 @@ def process_file(path: Path, verbose: bool = False, beta: bool = False) -> None:
498502
# Use single newlines to test composition theory
499503
new_content = "\n".join(filter(None, new_content_parts))
500504

501-
# Only write changes if content changed
502-
if new_content != original_content:
505+
def normalize_version(text: str) -> str:
506+
"""Replaces the version string in a docstring with a placeholder."""
507+
return re.sub(
508+
r"(Table of content is automatically generated by Agent Docstrings v)[\d\.]+\w*",
509+
r"\1[VERSION]",
510+
text,
511+
)
512+
513+
# To avoid rewriting files just for a version bump, we compare the content
514+
# with the version number normalized.
515+
normalized_original = normalize_version(original_content)
516+
normalized_new = normalize_version(new_content)
517+
518+
if normalized_original != normalized_new:
503519
path.write_text(new_content, encoding="utf-8")
504520
if verbose:
505521
print(f"Processed {language.capitalize()}: {path}")

agent_docstrings/languages/common.py

Lines changed: 39 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
"""
22
--- AUTO-GENERATED DOCSTRING ---
3-
Table of content is automatically generated by Agent Docstrings v1.3.2
3+
Table of content is automatically generated by Agent Docstrings v1.3.3
44
55
Classes/Functions:
6-
- SignatureInfo (line 17):
7-
- ClassInfo (line 21):
8-
- CommentStyle (line 27):
9-
- remove_agent_docstring(text: str, language: str) -> str (line 46)
6+
- SignatureInfo (line 19):
7+
- ClassInfo (line 24):
8+
- CommentStyle (line 31):
9+
- remove_agent_docstring(text: str, language: str) -> str (line 52)
1010
--- END AUTO-GENERATED DOCSTRING ---
1111
"""
1212
from __future__ import annotations
@@ -57,22 +57,47 @@ def remove_agent_docstring(text: str, language: str) -> str:
5757
if language == "python":
5858
def replacer(match):
5959
docstring_content = match.group(0)
60+
61+
# If the docstring doesn't contain the agent marker, it's a manual docstring.
62+
# Leave it untouched.
63+
if DOCSTRING_START_MARKER not in docstring_content:
64+
return docstring_content
65+
66+
# * Match the auto-generated block inside the docstring, including any leading/
67+
# * trailing whitespace and the trailing newline (if present). Use single
68+
# * backslashes so that ``\s`` is interpreted by the *regex* engine as a
69+
# * whitespace token instead of a literal backslash followed by ``s``.
6070
auto_content_pattern = re.compile(
61-
rf'\s*{start_marker_escaped}[\s\S]*?{end_marker_escaped}\s*?\n?',
62-
re.DOTALL
71+
rf"\s*{start_marker_escaped}[\s\S]*?{end_marker_escaped}\s*\n?",
72+
re.DOTALL,
6373
)
6474
cleaned_docstring = auto_content_pattern.sub('', docstring_content)
75+
76+
# Check what's left after removing the agent part
6577
temp_cleaned = cleaned_docstring.replace('"""', '').replace("'''", '').strip()
78+
6679
if not temp_cleaned:
67-
return '' # Remove empty docstring
68-
# Ensure single newline padding for non-empty manual comments
69-
return f'"""\n{temp_cleaned}\n"""'
70-
docstring_pattern = re.compile(r'^\s*("""[\s\S]*?"""|'r"'''[\s\S]*?''')")
80+
return '' # Docstring was purely agent-generated, so remove it.
81+
82+
# There was a manual part. Reformat it cleanly.
83+
return f'"""\\n{temp_cleaned}\\n"""'
84+
85+
# * Match ANY triple-quoted block (single or double quotes) anywhere in the text.
86+
# * The former pattern anchored at ``^`` missed auto-generated blocks that were
87+
# * not located at the very start of the file, leading to duplication issues.
88+
docstring_pattern = re.compile(
89+
r'("""[\s\S]*?"""|\'\'\'[\s\S]*?\'\'\')',
90+
re.DOTALL,
91+
)
7192
# Iteratively clean the text
7293
cleaned_text = docstring_pattern.sub(replacer, text)
73-
cleaned_text = docstring_pattern.sub(replacer, cleaned_text) # Run again to handle adjacent blocks
74-
# Collapse whitespace and return
75-
return cleaned_text.strip()
94+
# * Run a second pass to handle cases where two docstrings appear back-to-back,
95+
# * which can happen after removing an intermediary block.
96+
cleaned_text = docstring_pattern.sub(replacer, cleaned_text)
97+
# * Remove leading whitespace that may be left after docstring removal
98+
# * to ensure consistent line numbering between runs
99+
cleaned_text = cleaned_text.lstrip('\n')
100+
return cleaned_text
76101
else:
77102
# For C-style comments, be more flexible with the format
78103
# Handle both compact (/**---...---*/) and expanded formats

0 commit comments

Comments
 (0)