forked from Aunsiels/pyformlang
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrecursive_decent_parser.py
More file actions
138 lines (119 loc) · 4.62 KB
/
recursive_decent_parser.py
File metadata and controls
138 lines (119 loc) · 4.62 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
"""A recursive decent parser of CFG."""
from typing import List, Iterable, Tuple, Optional, Hashable
from .cfg import CFG
from .parse_tree import ParseTree, NotParsableError
from ..objects.cfg_objects import CFGObject, Variable, Terminal, Epsilon
from ..objects.cfg_objects.utils import to_terminal
ExpansionSymbol = Tuple[CFGObject, ParseTree]
Expansion = List[ExpansionSymbol]
def _get_index_to_extend(current_expansion: Expansion, left: bool) \
-> Tuple[int, Optional[ExpansionSymbol]]:
order = enumerate(current_expansion)
if not left:
order = reversed(list(order))
for i, symbol in order:
if isinstance(symbol[0], Variable):
return i, symbol
return -1, None
class RecursiveDecentParser:
"""A recursive Top-Down parser of CFG.
Parameters
----------
cfg:
A Context-Free Grammar to parse.
"""
def __init__(self, cfg: CFG) -> None:
"""Initializes the parser."""
self._cfg = cfg
def get_parse_tree(self, word: Iterable[Hashable], left: bool = True) \
-> ParseTree:
"""Gets a parse tree of the given word.
Parameters
----------
word:
The word to parse.
left:
If we do the recursive from the left or the right
(left by default).
Returns
-------
The parse tree of the given word.
Raises
------
NotParsableError
When the word cannot be parsed.
"""
if not self._cfg.start_symbol:
raise NotParsableError
word = [to_terminal(x) for x in word if x != Epsilon()]
parse_tree = ParseTree(self._cfg.start_symbol)
starting_expansion: Expansion = [(self._cfg.start_symbol, parse_tree)]
if self._get_parse_tree_sub(word, starting_expansion, left):
return parse_tree
raise NotParsableError
def _match(self,
word: List[Terminal],
current_expansion: Expansion,
idx_word: int = 0,
idx_current_expansion: int = 0) -> bool:
if idx_word == len(word) and \
idx_current_expansion == len(current_expansion):
return True
if idx_word > len(word):
return False
if idx_current_expansion == len(current_expansion):
return False
if isinstance(current_expansion[idx_current_expansion][0], Variable):
return self._match(word, current_expansion, idx_word + 1,
idx_current_expansion) or \
self._match(word, current_expansion, idx_word,
idx_current_expansion + 1)
if idx_word < len(word) and \
word[idx_word] == current_expansion[idx_current_expansion][0]:
return self._match(word, current_expansion, idx_word + 1,
idx_current_expansion + 1)
return False
def _get_parse_tree_sub(self,
word: List[Terminal],
current_expansion: Expansion,
left: bool = True) -> bool:
if not self._match(word, current_expansion):
return False
extend_idx, to_expand = _get_index_to_extend(current_expansion, left)
# Nothing else to extend, we are done
if to_expand is None:
return True
begin = current_expansion[:extend_idx]
end = current_expansion[extend_idx + 1:]
for production in self._cfg.productions:
if production.head == to_expand[0]:
replacement = [(x, ParseTree(x)) for x in production.body]
new_expansion = begin + replacement + end
if self._get_parse_tree_sub(word, new_expansion, left):
to_expand[1].sons = [x[1] for x in replacement]
return True
return False
def is_parsable(self, word: Iterable[Hashable], left: bool = True) -> bool:
"""Whether the given word is parsable or not.
Parameters
----------
word:
The word to parse.
left:
If we do the recursive from the left or the right
(left by default).
Returns
-------
Whether the word is parsable.
Raises
------
RecursionError
If the recursion goes too deep. This error occurs because some
the algorithm is not guaranteed to terminate with left/right
recursive grammars.
"""
try:
self.get_parse_tree(word, left)
except NotParsableError:
return False
return True