Skip to content

Commit 69994dd

Browse files
committed
use dedicated database for advisories
1 parent 7eda8cf commit 69994dd

7 files changed

Lines changed: 342 additions & 13 deletions

File tree

src/seclab_taskflows/mcp_servers/ghsa.py

Lines changed: 281 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,12 @@
66
import json
77
from urllib.parse import urlparse, parse_qs
88
from .gh_code_scanning import call_api
9-
from seclab_taskflow_agent.path_utils import log_file_name
9+
from seclab_taskflow_agent.path_utils import mcp_data_dir, log_file_name
10+
from .ghsa_models import GHSA, GHSASummary, Base
11+
from pathlib import Path
12+
from sqlalchemy import create_engine
13+
from sqlalchemy.orm import Session
14+
from .utils import process_repo
1015

1116
logging.basicConfig(
1217
level=logging.DEBUG,
@@ -17,6 +22,140 @@
1722

1823
mcp = FastMCP("GitHubRepoAdvisories")
1924

25+
MEMORY = mcp_data_dir("seclab-taskflows", "ghsa", "GHSA_DIR")
26+
27+
28+
def ghsa_to_dict(result):
29+
return {
30+
"id": result.id,
31+
"ghsa_id": result.ghsa_id,
32+
"repo": result.repo.lower(),
33+
"severity": result.severity,
34+
"cve_id": result.cve_id,
35+
"description": result.description,
36+
"summary": result.summary,
37+
"published_at": result.published_at,
38+
"state": result.state,
39+
}
40+
41+
42+
def ghsa_summary_to_dict(summary):
43+
return {
44+
"id": summary.id,
45+
"repo": summary.repo.lower(),
46+
"total_advisories": summary.total_advisories,
47+
"high_severity_count": summary.high_severity_count,
48+
"medium_severity_count": summary.medium_severity_count,
49+
"low_severity_count": summary.low_severity_count,
50+
"summary_notes": summary.summary_notes,
51+
}
52+
53+
class GHSABackend:
54+
def __init__(self, memcache_state_dir: str):
55+
self.memcache_state_dir = memcache_state_dir
56+
self.location_pattern = r"^([a-zA-Z]+)(:\d+){4}$"
57+
if not Path(self.memcache_state_dir).exists():
58+
db_dir = "sqlite://"
59+
else:
60+
db_dir = f"sqlite:///{self.memcache_state_dir}/ghsa.db"
61+
self.engine = create_engine(db_dir, echo=False)
62+
Base.metadata.create_all(
63+
self.engine,
64+
tables=[
65+
GHSA.__table__,
66+
GHSASummary.__table__,
67+
],
68+
)
69+
70+
def store_new_ghsa(self, repo, ghsa_id, severity, cve_id, description, summary, published_at, state):
71+
with Session(self.engine) as session:
72+
existing = session.query(GHSA).filter_by(repo=repo, ghsa_id=ghsa_id).first()
73+
if existing:
74+
if severity:
75+
existing.severity = severity
76+
if cve_id:
77+
existing.cve_id = cve_id
78+
if description:
79+
existing.description = description
80+
if summary:
81+
existing.summary = summary
82+
if published_at:
83+
existing.published_at = published_at
84+
if state:
85+
existing.state = state
86+
else:
87+
new_ghsa = GHSA(
88+
repo=repo,
89+
ghsa_id=ghsa_id,
90+
severity=severity,
91+
cve_id=cve_id,
92+
description=description,
93+
summary=summary,
94+
published_at=published_at,
95+
state=state,
96+
)
97+
session.add(new_ghsa)
98+
session.commit()
99+
return f"Updated or added GHSA {ghsa_id} for {repo}"
100+
101+
def get_ghsa(self, repo, ghsa_id):
102+
with Session(self.engine) as session:
103+
existing = session.query(GHSA).filter_by(repo=repo, ghsa_id=ghsa_id).first()
104+
if not existing:
105+
return None
106+
return ghsa_to_dict(existing)
107+
108+
def get_ghsas(self, repo):
109+
with Session(self.engine) as session:
110+
existing = session.query(GHSA).filter_by(repo=repo).all()
111+
return [ghsa_to_dict(ghsa) for ghsa in existing]
112+
113+
def store_new_ghsa_summary(
114+
self,
115+
repo,
116+
total_advisories,
117+
high_severity_count,
118+
medium_severity_count,
119+
low_severity_count,
120+
summary_notes,
121+
):
122+
with Session(self.engine) as session:
123+
existing = session.query(GHSASummary).filter_by(repo=repo).first()
124+
if existing:
125+
existing.total_advisories = total_advisories
126+
existing.high_severity_count = high_severity_count
127+
existing.medium_severity_count = medium_severity_count
128+
existing.low_severity_count = low_severity_count
129+
existing.summary_notes = (existing.summary_notes or "") + (summary_notes or "")
130+
else:
131+
new_summary = GHSASummary(
132+
repo=repo,
133+
total_advisories=total_advisories,
134+
high_severity_count=high_severity_count,
135+
medium_severity_count=medium_severity_count,
136+
low_severity_count=low_severity_count,
137+
summary_notes=summary_notes,
138+
)
139+
session.add(new_summary)
140+
session.commit()
141+
return f"Updated or added GHSA summary for {repo}"
142+
143+
def get_ghsa_summary(self, repo):
144+
with Session(self.engine) as session:
145+
existing = session.query(GHSASummary).filter_by(repo=repo).first()
146+
if not existing:
147+
return None
148+
return ghsa_summary_to_dict(existing)
149+
150+
def clear_repo(self, repo):
151+
with Session(self.engine) as session:
152+
session.query(GHSA).filter_by(repo=repo).delete()
153+
session.query(GHSASummary).filter_by(repo=repo).delete()
154+
session.commit()
155+
return f"Cleared GHSA results for repo {repo}"
156+
157+
158+
backend = GHSABackend(MEMORY)
20159

21160
# The advisories contain a lot of information, so we need to filter
22161
# some of it out to avoid exceeding the maximum prompt size.
@@ -26,6 +165,8 @@ def parse_advisory(advisory: dict) -> dict:
26165
"ghsa_id": advisory.get("ghsa_id", ""),
27166
"cve_id": advisory.get("cve_id", ""),
28167
"summary": advisory.get("summary", ""),
168+
"description": advisory.get("description", ""),
169+
"severity": advisory.get("severity", ""),
29170
"published_at": advisory.get("published_at", ""),
30171
"state": advisory.get("state", ""),
31172
}
@@ -70,6 +211,145 @@ async def fetch_GHSA_list(
70211
return results
71212
return json.dumps(results, indent=2)
72213

214+
@mcp.tool()
215+
async def fetch_and_store_GHSA_list(
216+
owner: str = Field(description="The owner of the repo"), repo: str = Field(description="The repository name"),
217+
return_results: bool = Field(description="Whether to return the fetched results as a JSON string", default=False)
218+
) -> str:
219+
"""Fetch all GitHub Security Advisories (GHSAs) for a specific repository and store them in the database."""
220+
results = await fetch_GHSA_list_from_gh(owner, repo)
221+
if isinstance(results, str):
222+
return results
223+
for advisory in results:
224+
backend.store_new_ghsa(
225+
process_repo(owner, repo),
226+
advisory["ghsa_id"],
227+
advisory["severity"],
228+
advisory["cve_id"],
229+
advisory["description"],
230+
advisory["summary"],
231+
advisory["published_at"],
232+
advisory["state"],
233+
)
234+
if return_results:
235+
return json.dumps(results, indent=2)
236+
return f"Fetched and stored {len(results)} GHSAs for {owner}/{repo}"
237+
238+
@mcp.tool()
239+
def store_new_ghsa(
240+
owner: str = Field(description="The owner of the GitHub repository"),
241+
repo: str = Field(description="The name of the GitHub repository"),
242+
ghsa_id: str = Field(description="The GHSA ID of the advisory"),
243+
severity: str = Field(description="The severity of the advisory"),
244+
cve_id: str = Field(description="The CVE ID if available", default=""),
245+
description: str = Field(description="Description for this advisory", default=""),
246+
summary: str = Field(description="Summary for this advisory", default=""),
247+
published_at: str = Field(description="Published timestamp for this advisory", default=""),
248+
state: str = Field(description="State for this advisory (e.g. published, withdrawn)", default=""),
249+
):
250+
"""Store a GHSA advisory record in the database."""
251+
return backend.store_new_ghsa(
252+
process_repo(owner, repo), ghsa_id, severity, cve_id, description, summary, published_at, state
253+
)
254+
255+
@mcp.tool()
256+
def get_ghsa_from_db(
257+
owner: str = Field(description="The owner of the GitHub repository"),
258+
repo: str = Field(description="The name of the GitHub repository"),
259+
ghsa_id: str = Field(description="The GHSA ID of the advisory"),
260+
):
261+
"""Get a GHSA advisory record from the database."""
262+
repo_name = process_repo(owner, repo)
263+
result = backend.get_ghsa(repo_name, ghsa_id)
264+
if not result:
265+
return f"Error: No GHSA entry exists in repo: {repo_name} and ghsa_id {ghsa_id}"
266+
return json.dumps(result)
267+
268+
269+
@mcp.tool()
270+
def get_ghsas_for_repo_from_db(
271+
owner: str = Field(description="The owner of the GitHub repository"),
272+
repo: str = Field(description="The name of the GitHub repository"),
273+
):
274+
"""Get all GHSA advisory records for a repository."""
275+
return json.dumps(backend.get_ghsas(process_repo(owner, repo)))
276+
277+
@mcp.tool()
278+
def get_ghsa_with_id_from_db(
279+
owner: str = Field(description="The owner of the GitHub repository"),
280+
repo: str = Field(description="The name of the GitHub repository"),
281+
ghsa_id: str = Field(description="The GHSA ID of the advisory"),
282+
):
283+
"""Get a GHSA advisory record with a specific GHSA ID from the database."""
284+
repo_name = process_repo(owner, repo)
285+
result = backend.get_ghsa(repo_name, ghsa_id)
286+
if not result:
287+
return f"Error: No GHSA entry exists in repo: {repo_name} and ghsa_id {ghsa_id}"
288+
return json.dumps(result)
289+
290+
@mcp.tool()
291+
def store_new_ghsa_summary(
292+
owner: str = Field(description="The owner of the GitHub repository"),
293+
repo: str = Field(description="The name of the GitHub repository"),
294+
total_advisories: int = Field(description="Total number of advisories"),
295+
high_severity_count: int = Field(description="Number of high severity advisories"),
296+
medium_severity_count: int = Field(description="Number of medium severity advisories"),
297+
low_severity_count: int = Field(description="Number of low severity advisories"),
298+
summary_notes: str = Field(description="Notes for the advisory summary", default=""),
299+
):
300+
"""Store GHSA summary statistics for a repository."""
301+
return backend.store_new_ghsa_summary(
302+
process_repo(owner, repo),
303+
total_advisories,
304+
high_severity_count,
305+
medium_severity_count,
306+
low_severity_count,
307+
summary_notes,
308+
)
309+
310+
311+
@mcp.tool()
312+
def add_ghsa_summary_notes(
313+
owner: str = Field(description="The owner of the GitHub repository"),
314+
repo: str = Field(description="The name of the GitHub repository"),
315+
summary_notes: str = Field(description="New notes for the advisory summary", default=""),
316+
):
317+
"""Append notes to the GHSA summary for a repository."""
318+
repo_name = process_repo(owner, repo)
319+
existing = backend.get_ghsa_summary(repo_name)
320+
if not existing:
321+
return f"Error: No GHSA summary exists in repo: {repo_name}"
322+
return backend.store_new_ghsa_summary(
323+
repo_name,
324+
existing["total_advisories"],
325+
existing["high_severity_count"],
326+
existing["medium_severity_count"],
327+
existing["low_severity_count"],
328+
summary_notes,
329+
)
330+
331+
332+
@mcp.tool()
333+
def get_ghsa_summary(
334+
owner: str = Field(description="The owner of the GitHub repository"),
335+
repo: str = Field(description="The name of the GitHub repository"),
336+
):
337+
"""Get the GHSA summary for a repository."""
338+
repo_name = process_repo(owner, repo)
339+
result = backend.get_ghsa_summary(repo_name)
340+
if not result:
341+
return f"Error: No GHSA summary exists in repo: {repo_name}"
342+
return json.dumps(result)
343+
344+
345+
@mcp.tool()
346+
def clear_repo(
347+
owner: str = Field(description="The owner of the GitHub repository"),
348+
repo: str = Field(description="The name of the GitHub repository"),
349+
):
350+
"""Clear GHSA and GHSA summary records for a repository."""
351+
return backend.clear_repo(process_repo(owner, repo))
352+
73353

74354
async def fetch_GHSA_details_from_gh(owner: str, repo: str, ghsa_id: str) -> str | dict:
75355
"""Fetch the details of a repository security advisory."""
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
# SPDX-FileCopyrightText: GitHub, Inc.
2+
# SPDX-License-Identifier: MIT
3+
4+
from sqlalchemy import Text
5+
from sqlalchemy.orm import DeclarativeBase, mapped_column, Mapped
6+
from typing import Optional
7+
8+
9+
class Base(DeclarativeBase):
10+
pass
11+
12+
class GHSA(Base):
13+
__tablename__ = "ghsa"
14+
15+
id: Mapped[int] = mapped_column(primary_key=True)
16+
ghsa_id: Mapped[str]
17+
repo: Mapped[str]
18+
severity: Mapped[str]
19+
cve_id: Mapped[Optional[str]] = mapped_column(nullable=True)
20+
description: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
21+
summary: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
22+
published_at: Mapped[Optional[str]] = mapped_column(nullable=True)
23+
state: Mapped[Optional[str]] = mapped_column(nullable=True)
24+
25+
def __repr__(self):
26+
return (
27+
f"<GHSA(id={self.id}, ghsa_id={self.ghsa_id}, repo={self.repo}, "
28+
f"severity={self.severity}, cve_id={self.cve_id}, description={self.description}, summary={self.summary}, "
29+
f"published_at={self.published_at}, state={self.state})>"
30+
)
31+
32+
class GHSASummary(Base):
33+
__tablename__ = "ghsa_summary"
34+
35+
id: Mapped[int] = mapped_column(primary_key=True)
36+
repo: Mapped[str]
37+
total_advisories: Mapped[int]
38+
high_severity_count: Mapped[int]
39+
medium_severity_count: Mapped[int]
40+
low_severity_count: Mapped[int]
41+
summary_notes: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
42+
43+
def __repr__(self):
44+
return (
45+
f"<GHSASummary(id={self.id}, repo={self.repo}, total_advisories={self.total_advisories}, "
46+
f"high_severity_count={self.high_severity_count}, medium_severity_count={self.medium_severity_count}, "
47+
f"low_severity_count={self.low_severity_count}, summary_notes={self.summary_notes})>"
48+
)

src/seclab_taskflows/prompts/audit/known_security_advisories.yaml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,5 +7,8 @@ seclab-taskflow-agent:
77
prompt: |
88
## Known Security Advisories for this Repository
99
10-
Fetch the security advisories for {{ globals.repo }} from memcache (stored under the key 'security_advisories_{{ globals.repo }}'). If the value in the memcache is null or an error message, clearly state that no advisories are available and skip advisory analysis. Otherwise, state how many advisories were found.
10+
Fetch the security advisories for {{ globals.repo }} from the GHSASummary and GHSA entries
11+
stored in the database. Do not fetch them from GitHub directly.
12+
If the value in the database is null or an error message, clearly state that no advisories are available and skip advisory analysis.
13+
Otherwise, state how many advisories were found.
1114
Review these advisories and consider them when identifying security risks. If you identify code that is similar to a known advisory pattern, highlight that connection.

src/seclab_taskflows/taskflows/audit/audit_issue_local_iter.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,4 +50,4 @@ taskflow:
5050
toolboxes:
5151
- seclab_taskflows.toolboxes.repo_context
5252
- seclab_taskflows.toolboxes.local_file_viewer
53-
- seclab_taskflow_agent.toolboxes.memcache
53+
- seclab_taskflows.toolboxes.ghsa

src/seclab_taskflows/taskflows/audit/classify_application_local.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,4 +90,4 @@ taskflow:
9090
toolboxes:
9191
- seclab_taskflows.toolboxes.repo_context
9292
- seclab_taskflows.toolboxes.local_file_viewer
93-
- seclab_taskflow_agent.toolboxes.memcache
93+
- seclab_taskflows.toolboxes.ghsa

0 commit comments

Comments
 (0)