Skip to content

Commit 771df3f

Browse files
authored
Merge pull request #9 from SecrinLabs/typescript-parser
TypeScript parser support
2 parents ffdc89a + c620a30 commit 771df3f

6 files changed

Lines changed: 354 additions & 5 deletions

File tree

packages/parser/core/repository_analyzer.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
from packages.parser.models import GraphData, RepoNode
77
from packages.parser.core import BaseLanguageParser
8-
from packages.parser.languages import PythonParser, JavaScriptParser
8+
from packages.parser.languages import PythonParser, JavaScriptParser, TypeScriptParser
99
from packages.parser.utils import (
1010
detect_language,
1111
is_code_file,
@@ -42,8 +42,13 @@ def _initialize_parsers(self):
4242
if js_lang:
4343
self.parsers["javascript"] = JavaScriptParser(js_lang)
4444

45+
# TypeScript
46+
ts_lang = language_registry.get_language("typescript")
47+
if ts_lang:
48+
self.parsers["typescript"] = TypeScriptParser(ts_lang)
49+
4550
# Add more parsers as they become available
46-
# TypeScript, Java, Go, etc.
51+
# Java, Go, etc.
4752

4853
def analyze_repository(self, repo_path: str | Path, cleanup_after: bool = True) -> GraphData:
4954
"""
Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
"""Language-specific parsers"""
22
from .python_parser import PythonParser
33
from .javascript_parser import JavaScriptParser
4+
from .typescript_parser import TypeScriptParser
45

5-
__all__ = ["PythonParser", "JavaScriptParser"]
6+
__all__ = ["PythonParser", "JavaScriptParser", "TypeScriptParser"]
Lines changed: 304 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,304 @@
1+
from tree_sitter import Language, Node, Query, QueryCursor
2+
from pathlib import Path
3+
import hashlib
4+
5+
from packages.parser.core import BaseLanguageParser
6+
from packages.parser.models import (
7+
FileNode,
8+
ClassNode,
9+
FunctionNode,
10+
VariableNode,
11+
PackageNode,
12+
GraphData,
13+
Relationship,
14+
RelationshipType,
15+
)
16+
17+
18+
class TypeScriptParser(BaseLanguageParser):
19+
"""Parser for TypeScript files using tree-sitter"""
20+
21+
@property
22+
def language_name(self) -> str:
23+
return "typescript"
24+
25+
@property
26+
def file_extensions(self) -> list[str]:
27+
return [".ts", ".tsx"]
28+
29+
def _create_file_node(self, file_path: Path, content: str, repo_context: dict) -> FileNode:
30+
lines = len(content.split('\n'))
31+
sha = hashlib.sha256(content.encode()).hexdigest()
32+
33+
return FileNode(
34+
id=self._generate_id(repo_context["name"], str(file_path), "file"),
35+
path=str(file_path),
36+
language=self.language_name,
37+
sha=sha,
38+
lines=lines,
39+
source_path=str(file_path),
40+
repo_sha=repo_context.get("sha"),
41+
commit_hash=repo_context.get("commit_hash"),
42+
)
43+
44+
def _extract_classes(self, root_node: Node, content: str, file_node: FileNode,
45+
graph_data: GraphData, repo_context: dict):
46+
"""Extract TypeScript class definitions"""
47+
# TypeScript classes are similar to JS but can have decorators, implements, etc.
48+
query = Query(self.language, """
49+
(class_declaration
50+
name: (type_identifier) @class.name
51+
body: (class_body) @class.body) @class.def
52+
""")
53+
54+
cursor = QueryCursor(query)
55+
captures_dict = cursor.captures(root_node)
56+
captures = [(node, name) for name, nodes in captures_dict.items() for node in nodes]
57+
58+
for node, capture_name in captures:
59+
if capture_name == "class.def":
60+
class_name_node = node.child_by_field_name("name")
61+
if class_name_node:
62+
class_name = self._get_node_text(class_name_node, content)
63+
start_line = self._get_line_number(node)
64+
end_line = node.end_point[0] + 1
65+
66+
class_id = self._generate_id(
67+
repo_context["name"],
68+
str(file_node.path),
69+
"class",
70+
class_name
71+
)
72+
73+
class_node = ClassNode(
74+
id=class_id,
75+
name=class_name,
76+
start_line=start_line,
77+
end_line=end_line,
78+
source_path=file_node.path,
79+
repo_sha=repo_context.get("sha"),
80+
commit_hash=repo_context.get("commit_hash"),
81+
snippet=self._get_snippet(content, start_line, end_line),
82+
)
83+
84+
graph_data.add_node(class_node)
85+
86+
graph_data.add_relationship(Relationship(
87+
source_id=file_node.id,
88+
target_id=class_id,
89+
type=RelationshipType.CONTAINS_CLASS
90+
))
91+
92+
# Extract methods
93+
self._extract_methods(node, content, file_node, class_node, graph_data, repo_context)
94+
95+
def _extract_methods(self, class_node: Node, content: str, file_node: FileNode,
96+
class_obj: ClassNode, graph_data: GraphData, repo_context: dict):
97+
"""Extract methods from a TypeScript class"""
98+
body = class_node.child_by_field_name("body")
99+
if not body:
100+
return
101+
102+
for child in body.children:
103+
if child.type == "method_definition":
104+
name_node = child.child_by_field_name("name")
105+
params_node = child.child_by_field_name("parameters")
106+
107+
if name_node and params_node:
108+
method_name = self._get_node_text(name_node, content)
109+
params = self._get_node_text(params_node, content)
110+
signature = f"{method_name}{params}"
111+
112+
start_line = self._get_line_number(child)
113+
end_line = child.end_point[0] + 1
114+
115+
method_id = self._generate_id(
116+
repo_context["name"],
117+
str(file_node.path),
118+
"method",
119+
class_obj.name,
120+
method_name
121+
)
122+
123+
method_node = FunctionNode(
124+
id=method_id,
125+
name=method_name,
126+
signature=signature,
127+
start_line=start_line,
128+
end_line=end_line,
129+
is_method=True,
130+
source_path=file_node.path,
131+
repo_sha=repo_context.get("sha"),
132+
commit_hash=repo_context.get("commit_hash"),
133+
snippet=self._get_snippet(content, start_line, end_line),
134+
)
135+
136+
graph_data.add_node(method_node)
137+
138+
graph_data.add_relationship(Relationship(
139+
source_id=class_obj.id,
140+
target_id=method_id,
141+
type=RelationshipType.HAS_METHOD
142+
))
143+
144+
graph_data.add_relationship(Relationship(
145+
source_id=method_id,
146+
target_id=file_node.id,
147+
type=RelationshipType.DEFINED_IN
148+
))
149+
150+
def _extract_functions(self, root_node: Node, content: str, file_node: FileNode,
151+
graph_data: GraphData, repo_context: dict):
152+
"""Extract top-level TypeScript functions"""
153+
query = Query(self.language, """
154+
[
155+
(function_declaration
156+
name: (identifier) @func.name
157+
parameters: (formal_parameters) @func.params) @func.def
158+
(variable_declarator
159+
name: (identifier) @arrow.name
160+
value: (arrow_function) @arrow.func)
161+
]
162+
""")
163+
164+
cursor = QueryCursor(query)
165+
captures_dict = cursor.captures(root_node)
166+
captures = [(node, name) for name, nodes in captures_dict.items() for node in nodes]
167+
processed_funcs = set()
168+
169+
for node, capture_name in captures:
170+
if capture_name == "func.def":
171+
name_node = node.child_by_field_name("name")
172+
params_node = node.child_by_field_name("parameters")
173+
174+
if name_node and params_node:
175+
func_name = self._get_node_text(name_node, content)
176+
177+
if func_name in processed_funcs:
178+
continue
179+
processed_funcs.add(func_name)
180+
181+
params = self._get_node_text(params_node, content)
182+
signature = f"{func_name}{params}"
183+
184+
start_line = self._get_line_number(node)
185+
end_line = node.end_point[0] + 1
186+
187+
func_id = self._generate_id(
188+
repo_context["name"],
189+
str(file_node.path),
190+
"function",
191+
func_name
192+
)
193+
194+
func_node = FunctionNode(
195+
id=func_id,
196+
name=func_name,
197+
signature=signature,
198+
start_line=start_line,
199+
end_line=end_line,
200+
is_method=False,
201+
source_path=file_node.path,
202+
repo_sha=repo_context.get("sha"),
203+
commit_hash=repo_context.get("commit_hash"),
204+
snippet=self._get_snippet(content, start_line, end_line),
205+
)
206+
207+
graph_data.add_node(func_node)
208+
209+
graph_data.add_relationship(Relationship(
210+
source_id=file_node.id,
211+
target_id=func_id,
212+
type=RelationshipType.CONTAINS_FUNCTION
213+
))
214+
215+
graph_data.add_relationship(Relationship(
216+
source_id=func_id,
217+
target_id=file_node.id,
218+
type=RelationshipType.DEFINED_IN
219+
))
220+
221+
def _extract_imports(self, root_node: Node, content: str, file_node: FileNode,
222+
graph_data: GraphData, repo_context: dict):
223+
"""Extract TypeScript import statements"""
224+
query = Query(self.language, """
225+
[
226+
(import_statement
227+
source: (string) @import.source)
228+
]
229+
""")
230+
231+
cursor = QueryCursor(query)
232+
captures_dict = cursor.captures(root_node)
233+
captures = [(node, name) for name, nodes in captures_dict.items() for node in nodes]
234+
235+
for node, capture_name in captures:
236+
if capture_name == "import.source":
237+
import_path = self._get_node_text(node, content).strip('"').strip("'")
238+
239+
# Extract package name (handle @scoped packages)
240+
if import_path.startswith('@'):
241+
parts = import_path.split('/')
242+
package_name = '/'.join(parts[:2]) if len(parts) >= 2 else import_path
243+
elif import_path.startswith('.'):
244+
# Relative import - skip for now or handle differently
245+
continue
246+
else:
247+
package_name = import_path.split('/')[0]
248+
249+
package_id = self._generate_id("package", package_name)
250+
251+
# Check if package already exists
252+
existing = [n for n in graph_data.nodes if getattr(n, 'id', None) == package_id]
253+
254+
if not existing:
255+
package_node = PackageNode(
256+
id=package_id,
257+
name=package_name,
258+
version="unknown",
259+
)
260+
graph_data.add_node(package_node)
261+
262+
graph_data.add_relationship(Relationship(
263+
source_id=file_node.id,
264+
target_id=package_id,
265+
type=RelationshipType.IMPORTS
266+
))
267+
268+
def _extract_variables(self, root_node: Node, content: str, file_node: FileNode,
269+
graph_data: GraphData, repo_context: dict):
270+
"""Extract top-level variable declarations"""
271+
query = Query(self.language, """
272+
(variable_declaration
273+
(variable_declarator
274+
name: (identifier) @var.name)) @var.decl
275+
""")
276+
277+
cursor = QueryCursor(query)
278+
captures_dict = cursor.captures(root_node)
279+
captures = [(node, name) for name, nodes in captures_dict.items() for node in nodes]
280+
281+
for node, capture_name in captures:
282+
if capture_name == "var.name":
283+
var_name = self._get_node_text(node, content)
284+
start_line = self._get_line_number(node)
285+
286+
var_id = self._generate_id(
287+
repo_context["name"],
288+
str(file_node.path),
289+
"variable",
290+
var_name,
291+
str(start_line)
292+
)
293+
294+
var_node = VariableNode(
295+
id=var_id,
296+
name=var_name,
297+
kind="global",
298+
start_line=start_line,
299+
source_path=file_node.path,
300+
repo_sha=repo_context.get("sha"),
301+
commit_hash=repo_context.get("commit_hash"),
302+
)
303+
304+
graph_data.add_node(var_node)

packages/parser/utils/language_config.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,12 @@
1212
except ImportError:
1313
tree_sitter_javascript = None # type: ignore[assignment]
1414

15-
# Add more as needed: tree_sitter_typescript, tree_sitter_java, etc.
15+
try:
16+
import tree_sitter_typescript # type: ignore[import-untyped]
17+
except ImportError:
18+
tree_sitter_typescript = None # type: ignore[assignment]
19+
20+
# Add more as needed: tree_sitter_java, etc.
1621

1722

1823
class LanguageRegistry:
@@ -40,6 +45,17 @@ def _load_default_languages(self):
4045
else:
4146
print("Warning: tree_sitter_javascript not installed. Install with: pip install tree-sitter-javascript")
4247

48+
if tree_sitter_typescript is not None:
49+
try:
50+
# Load TypeScript language
51+
self._languages["typescript"] = Language(tree_sitter_typescript.language_typescript()) # type: ignore[attr-defined]
52+
# We could also load TSX if needed, maybe as "tsx" language
53+
# self._languages["tsx"] = Language(tree_sitter_typescript.language_tsx())
54+
except Exception as e:
55+
print(f"Warning: Could not load TypeScript language: {e}")
56+
else:
57+
print("Warning: tree_sitter_typescript not installed. Install with: pip install tree-sitter-typescript")
58+
4359
# Add more languages here as needed
4460
# self._languages["typescript"] = Language(tree_sitter_typescript.language_typescript())
4561
# self._languages["java"] = Language(tree_sitter_java.language())

0 commit comments

Comments
 (0)