forked from Aunsiels/pyformlang
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathregex_objects.py
More file actions
198 lines (146 loc) · 4.96 KB
/
regex_objects.py
File metadata and controls
198 lines (146 loc) · 4.96 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
"""
Representation of some objects used in regex.
"""
from typing import List, Iterable
from abc import abstractmethod
from ..cfg_objects.production import Production
from ..cfg_objects.utils import to_variable, to_terminal
class Node:
""" Represents a node in the tree representation of a regex
Parameters
----------
value : str
The value of the node
"""
def __init__(self, value: str) -> None:
self._value = value
@property
def value(self) -> str:
""" Give the value of the node
Returns
----------
value : str
The value of the node
"""
return self._value
@abstractmethod
def get_str_repr(self, sons_repr: Iterable[str]) -> str:
"""
The string representation of the node
Parameters
----------
sons_repr : iterable of str
The sons representations
Returns
-------
repr : str
The representation of this node
"""
raise NotImplementedError
@abstractmethod
def get_cfg_rules(self, current_symbol: str, sons: Iterable[str]) \
-> List[Production]:
""" Gets the rules for a context-free grammar to represent the \
operator"""
raise NotImplementedError
class Operator(Node):
""" Represents an operator
Parameters
----------
value : str
The value of the operator
"""
def __repr__(self) -> str:
return "Operator(" + str(self._value) + ")"
def get_str_repr(self, sons_repr: Iterable[str]) -> str:
""" Get the string representation """
raise NotImplementedError
def get_cfg_rules(self, current_symbol: str, sons: Iterable[str]) \
-> List[Production]:
""" Gets the rules for a context-free grammar to represent the \
operator"""
raise NotImplementedError
class Symbol(Node):
""" Represents a symbol
Parameters
----------
value : str
The value of the symbol
"""
def get_str_repr(self, sons_repr: Iterable[str]) -> str:
return str(self.value)
def get_cfg_rules(self, current_symbol: str, sons: Iterable[str]) \
-> List[Production]:
""" Gets the rules for a context-free grammar to represent the \
operator"""
return [Production(
to_variable(current_symbol),
[to_terminal(self.value)])]
def __repr__(self) -> str:
return "Symbol(" + str(self._value) + ")"
class Concatenation(Operator):
""" Represents a concatenation
"""
def get_str_repr(self, sons_repr: Iterable[str]) -> str:
return "(" + ".".join(sons_repr) + ")"
def get_cfg_rules(self, current_symbol: str, sons: Iterable[str]) \
-> List[Production]:
return [Production(
to_variable(current_symbol),
[to_variable(son) for son in sons])]
def __init__(self) -> None:
super().__init__("Concatenation")
class Union(Operator):
""" Represents a union
"""
def get_str_repr(self, sons_repr: Iterable[str]) -> str:
return "(" + "|".join(sons_repr) + ")"
def get_cfg_rules(self, current_symbol: str, sons: Iterable[str]) \
-> List[Production]:
return [Production(
to_variable(current_symbol),
[to_variable(son)]) for son in sons]
def __init__(self) -> None:
super().__init__("Union")
class KleeneStar(Operator):
""" Represents an epsilon symbol
"""
def get_str_repr(self, sons_repr: Iterable[str]) -> str:
return "(" + ".".join(sons_repr) + ")*"
def get_cfg_rules(self, current_symbol: str, sons: Iterable[str]) \
-> List[Production]:
return [
Production(
to_variable(current_symbol), []),
Production(
to_variable(current_symbol),
[to_variable(current_symbol), to_variable(current_symbol)]),
Production(
to_variable(current_symbol),
[to_variable(son) for son in sons])
]
def __init__(self) -> None:
super().__init__("Kleene Star")
class Epsilon(Symbol):
""" Represents an epsilon symbol
"""
def get_str_repr(self, sons_repr: Iterable[str]) -> str:
return "$"
def get_cfg_rules(self, current_symbol: str, sons: Iterable[str]) \
-> List[Production]:
return [Production(to_variable(current_symbol), [])]
def __init__(self) -> None:
super().__init__("Epsilon")
class Empty(Symbol):
""" Represents an empty symbol
"""
def __init__(self) -> None:
super().__init__("Empty")
def get_cfg_rules(self, current_symbol: str, sons: Iterable[str]) \
-> List[Production]:
return []
class MisformedRegexError(Exception):
""" Error for misformed regex """
def __init__(self, message: str, regex: str) -> None:
super().__init__(message + " Regex: " + regex)
self._regex = regex