Artemonim
diff --git a/‎CHANGELOG.md‎
Lines changed: 7 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎agent_docstrings/core.py‎
Lines changed: 25 additions & 9 deletions b/‎agent_docstrings/core.py‎
Lines changed: 25 additions & 9 deletions
diff --git a/‎agent_docstrings/languages/common.py‎
Lines changed: 39 additions & 14 deletions b/‎agent_docstrings/languages/common.py‎
Lines changed: 39 additions & 14 deletions
@@ -16,6 +16,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [NextRelease]
 
+### Fixed
+
+-   **Deterministic Processing**: Fixed a critical bug that caused line numbers in the table of contents to change on every run. This was due to inconsistent newline handling after removing an existing agent docstring. The process is now fully idempotent.
+-   **Robust Docstring Removal**: Improved the detection logic to correctly find and remove all agent-generated docstrings, even when located in the middle of a file or when multiple (erroneous) docstrings were present. This prevents docstring duplication on repeated runs.
+-   **Manual Docstring Preservation**: Ensured that manual docstrings are no longer reformatted or modified unless they are being merged with an agent-generated table of contents.
+-   **Version-Only Change Skipping**: Fixed a bug where files were being unnecessarily modified when only the version number in the auto-generated header differed, while the actual content structure remained unchanged. The tool now performs normalized content comparison that ignores version differences, preventing unnecessary file modifications after Agent Docstrings version updates.
+
 ### Documentation
 
 -   **Contribution Guide**: Added a new `CONTRIBUTING.md` file with detailed guidelines for development workflow and the release process.
 
@@ -399,12 +399,16 @@ def process_file(path: Path, verbose: bool = False, beta: bool = False) -> None:
             return
 
         # ! Calculate the correct line offset for the final positions
-        # * First create a temporary header to count its lines
+        # * To ensure deterministic line numbers, we need to calculate the offset
+        # * based on the final file structure, not the intermediate state
+        
+        # * First, determine how many lines will be in the final header
         temp_header = _format_header(classes, functions, language, 0)
-        temp_header_lines = temp_header.splitlines()
+        temp_header_line_count = len(temp_header.splitlines())
 
-        # * Calculate offset: preserved header lines + generated header lines
-        line_offset = header_end_line + len(temp_header_lines)
+        # * Calculate total offset: preserved header lines + generated header lines
+        # * This represents where the cleaned body will start in the final file
+        line_offset = header_end_line + temp_header_line_count
 
         # ! Language-specific adjustments for line numbering
         if language == "go":
@@ -462,9 +466,9 @@ def process_file(path: Path, verbose: bool = False, beta: bool = False) -> None:
 
                     if end_idx is not None:
                         # Compute auto header content lines with correct offset for merge
-                        # temp_header_lines holds the auto header lines including delimiters
-                        # content_lines length is temp_header_lines minus start/end markers
-                        offset_override = len(temp_header_lines) - 2
+                        # temp_header_line_count holds the auto header line count including delimiters
+                        # content_lines length is temp_header_line_count minus start/end markers
+                        offset_override = temp_header_line_count - 2
                         # Generate only the header content lines (without triple-quote delimiters)
                         header_inner = _get_header_content_lines(
                             classes, functions, language, offset_override
@@ -498,8 +502,20 @@ def process_file(path: Path, verbose: bool = False, beta: bool = False) -> None:
             # Use single newlines to test composition theory
             new_content = "\n".join(filter(None, new_content_parts))
 
-        # Only write changes if content changed
-        if new_content != original_content:
+        def normalize_version(text: str) -> str:
+            """Replaces the version string in a docstring with a placeholder."""
+            return re.sub(
+                r"(Table of content is automatically generated by Agent Docstrings v)[\d\.]+\w*",
+                r"\1[VERSION]",
+                text,
+            )
+
+        # To avoid rewriting files just for a version bump, we compare the content
+        # with the version number normalized.
+        normalized_original = normalize_version(original_content)
+        normalized_new = normalize_version(new_content)
+
+        if normalized_original != normalized_new:
             path.write_text(new_content, encoding="utf-8")
             if verbose:
                 print(f"Processed {language.capitalize()}: {path}")
 
@@ -1,12 +1,12 @@
 """
     --- AUTO-GENERATED DOCSTRING ---
-    Table of content is automatically generated by Agent Docstrings v1.3.2
+    Table of content is automatically generated by Agent Docstrings v1.3.3
     
     Classes/Functions:
-        - SignatureInfo (line 17):
-        - ClassInfo (line 21):
-        - CommentStyle (line 27):
-        - remove_agent_docstring(text: str, language: str) -> str (line 46)
+        - SignatureInfo (line 19):
+        - ClassInfo (line 24):
+        - CommentStyle (line 31):
+        - remove_agent_docstring(text: str, language: str) -> str (line 52)
     --- END AUTO-GENERATED DOCSTRING ---
 """
 from __future__ import annotations
@@ -57,22 +57,47 @@ def remove_agent_docstring(text: str, language: str) -> str:
     if language == "python":
         def replacer(match):
             docstring_content = match.group(0)
+
+            # If the docstring doesn't contain the agent marker, it's a manual docstring.
+            # Leave it untouched.
+            if DOCSTRING_START_MARKER not in docstring_content:
+                return docstring_content
+
+            # * Match the auto-generated block inside the docstring, including any leading/
+            # * trailing whitespace and the trailing newline (if present). Use single
+            # * backslashes so that ``\s`` is interpreted by the *regex* engine as a
+            # * whitespace token instead of a literal backslash followed by ``s``.
             auto_content_pattern = re.compile(
-                rf'\s*{start_marker_escaped}[\s\S]*?{end_marker_escaped}\s*?\n?',
-                re.DOTALL
+                rf"\s*{start_marker_escaped}[\s\S]*?{end_marker_escaped}\s*\n?",
+                re.DOTALL,
             )
             cleaned_docstring = auto_content_pattern.sub('', docstring_content)
+            
+            # Check what's left after removing the agent part
             temp_cleaned = cleaned_docstring.replace('"""', '').replace("'''", '').strip()
+            
             if not temp_cleaned:
-                return ''  # Remove empty docstring
-            # Ensure single newline padding for non-empty manual comments
-            return f'"""\n{temp_cleaned}\n"""'
-        docstring_pattern = re.compile(r'^\s*("""[\s\S]*?"""|'r"'''[\s\S]*?''')")
+                return ''  # Docstring was purely agent-generated, so remove it.
+
+            # There was a manual part. Reformat it cleanly.
+            return f'"""\\n{temp_cleaned}\\n"""'
+
+        # * Match ANY triple-quoted block (single or double quotes) anywhere in the text.
+        # * The former pattern anchored at ``^`` missed auto-generated blocks that were
+        # * not located at the very start of the file, leading to duplication issues.
+        docstring_pattern = re.compile(
+            r'("""[\s\S]*?"""|\'\'\'[\s\S]*?\'\'\')',
+            re.DOTALL,
+        )
         # Iteratively clean the text
         cleaned_text = docstring_pattern.sub(replacer, text)
-        cleaned_text = docstring_pattern.sub(replacer, cleaned_text) # Run again to handle adjacent blocks
-        # Collapse whitespace and return
-        return cleaned_text.strip()
+        # * Run a second pass to handle cases where two docstrings appear back-to-back,
+        # * which can happen after removing an intermediary block.
+        cleaned_text = docstring_pattern.sub(replacer, cleaned_text)
+        # * Remove leading whitespace that may be left after docstring removal
+        # * to ensure consistent line numbering between runs
+        cleaned_text = cleaned_text.lstrip('\n')
+        return cleaned_text
     else:
         # For C-style comments, be more flexible with the format
         # Handle both compact (/**---...---*/) and expanded formats