2626everything = []
2727bad_intel = set () # unclean intel urls
2828bad_scripts = set () # unclean javascript file urls
29- datasets = [files , intel , robots , custom , failed , scripts , external , fuzzable , endpoints , keys ]
29+ datasets = [
30+ files ,
31+ intel ,
32+ robots ,
33+ custom ,
34+ failed ,
35+ scripts ,
36+ external ,
37+ fuzzable ,
38+ endpoints ,
39+ keys ,
40+ ]
3041dataset_names = [
31- 'files' , 'intel' , 'robots' , 'custom' , 'failed' , 'scripts' , 'external' , 'fuzzable' , 'endpoints' , 'keys'
42+ "files" ,
43+ "intel" ,
44+ "robots" ,
45+ "custom" ,
46+ "failed" ,
47+ "scripts" ,
48+ "external" ,
49+ "fuzzable" ,
50+ "endpoints" ,
51+ "keys" ,
3252]
3353
3454
35- def execute_all (client : httpx .Client , link : str , * , display_status : bool = False ) -> None :
55+ def execute_all (
56+ client : httpx .Client , link : str , * , display_status : bool = False
57+ ) -> None :
3658 """Initialise datasets and functions to retrieve data, and execute
3759 each for a given link.
3860
@@ -43,141 +65,149 @@ def execute_all(client: httpx.Client, link: str, *, display_status: bool = False
4365 """
4466
4567 resp = client .get (url = link )
46- soup = BeautifulSoup (resp .text , ' html.parser' )
68+ soup = BeautifulSoup (resp .text , " html.parser" )
4769 validation_functions = [
48- get_robots_txt , get_dot_git , get_dot_svn , get_dot_git , get_intel , get_dot_htaccess , get_bitcoin_address
70+ get_robots_txt ,
71+ get_dot_git ,
72+ get_dot_svn ,
73+ get_dot_git ,
74+ get_intel ,
75+ get_dot_htaccess ,
76+ get_bitcoin_address ,
4977 ]
5078 for validate_func in validation_functions :
5179 try :
5280 validate_func (client , link , resp )
5381 except Exception as e :
5482 logging .debug (e )
55- cprint (' Error' , ' red' )
83+ cprint (" Error" , " red" )
5684
5785 display_webpage_description (soup )
5886 # display_headers(response)
5987
6088
6189def display_headers (response ):
62- """ Print all headers in response object.
90+ """Print all headers in response object.
6391
6492 Args:
6593 response (object): Response object.
6694 """
67- print ('''
95+ print (
96+ """
6897 RESPONSE HEADERS
6998 __________________
70- ''' )
99+ """
100+ )
71101 for key , val in response .headers .items ():
72- print ('*' , key , ':' , val )
102+ print ("*" , key , ":" , val )
73103
74104
75105def get_robots_txt (client : httpx .Client , target : str , response : str ) -> None :
76- """ Check link for Robot.txt, and if found, add link to robots dataset.
106+ """Check link for Robot.txt, and if found, add link to robots dataset.
77107
78108 Args:
79109 target (str): URL to be checked.
80110 response (object): Response object containing data to check.
81111 """
82- cprint ("[*]Checking for Robots.txt" , ' yellow' )
112+ cprint ("[*]Checking for Robots.txt" , " yellow" )
83113 url = target
84114 target = "{0.scheme}://{0.netloc}/" .format (urlsplit (url ))
85115 client .get (target + "robots.txt" )
86116 print (target + "robots.txt" )
87- matches = re .findall (r' Allow: (.*)|Disallow: (.*)' , response )
117+ matches = re .findall (r" Allow: (.*)|Disallow: (.*)" , response )
88118 for match in matches :
89- match = '' .join (match )
90- if '*' not in match :
119+ match = "" .join (match )
120+ if "*" not in match :
91121 url = target + match
92122 robots .add (url )
93- cprint ("Robots.txt found" , ' blue' )
123+ cprint ("Robots.txt found" , " blue" )
94124 print (robots )
95125
96126
97127def get_intel (client : httpx .Client , url : str , response : str ) -> None :
98- """ Check link for intel, and if found, add link to intel dataset,
128+ """Check link for intel, and if found, add link to intel dataset,
99129 including but not limited to website accounts and AWS buckets.
100130
101131 Args:
102132 target (str): URL to be checked.
103133 response (object): Response object containing data to check.
104134 """
105135 intel = set ()
106- regex = r''' ([\w\.-]+s[\w\.-]+\.amazonaws\.com)|([\w\.-]+@[\w\.-]+\.[\.\w]+)'''
136+ regex = r""" ([\w\.-]+s[\w\.-]+\.amazonaws\.com)|([\w\.-]+@[\w\.-]+\.[\.\w]+)"""
107137 matches = re .findall (regex , response )
108138 print ("Intel\n --------\n \n " )
109139 for match in matches :
110140 intel .add (match )
111141
112142
113143def get_dot_git (client : httpx .Client , target : str , response : str ) -> None :
114- """ Check link for .git folders exposed on public domain.
144+ """Check link for .git folders exposed on public domain.
115145
116146 Args:
117147 target (str): URL to be checked.
118148 response (object): Response object containing data to check.
119149 """
120- cprint ("[*]Checking for .git folder" , ' yellow' )
150+ cprint ("[*]Checking for .git folder" , " yellow" )
121151 url = target
122152 target = "{0.scheme}://{0.netloc}/" .format (urlsplit (url ))
123153 resp = client .get (target + "/.git/config" )
124154 if not resp .text .__contains__ ("404" ):
125- cprint ("Alert!" , ' red' )
126- cprint (".git folder exposed publicly" , ' red' )
155+ cprint ("Alert!" , " red" )
156+ cprint (".git folder exposed publicly" , " red" )
127157 else :
128- cprint ("NO .git folder found" , ' blue' )
158+ cprint ("NO .git folder found" , " blue" )
129159
130160
131161def get_bitcoin_address (client : httpx .Client , target : str , response : str ) -> None :
132- """ Check link for Bitcoin addresses, and if found, print.
162+ """Check link for Bitcoin addresses, and if found, print.
133163
134164 Args:
135165 target (str): URL to be checked.
136166 response (object): Response object containing data to check.
137167 """
138- bitcoins = re .findall (r' ^[13][a-km-zA-HJ-NP-Z1-9]{25,34}$' , response )
168+ bitcoins = re .findall (r" ^[13][a-km-zA-HJ-NP-Z1-9]{25,34}$" , response )
139169 print ("BTC FOUND: " , len (bitcoins ))
140170 for bitcoin in bitcoins :
141171 print ("BTC: " , bitcoin )
142172
143173
144174def get_dot_svn (client : httpx .Client , target : str , response : str ) -> None :
145- """ Check link for .svn folders exposed on public domain=.
175+ """Check link for .svn folders exposed on public domain=.
146176
147177 Args:
148178 target (str): URL to be checked.
149179 response (object): Response object containing data to check.
150180 """
151- cprint ("[*]Checking for .svn folder" , ' yellow' )
181+ cprint ("[*]Checking for .svn folder" , " yellow" )
152182 url = target
153183 target = "{0.scheme}://{0.netloc}/" .format (urlsplit (url ))
154- resp = httpx .get (target + "/.svn/entries" , proxies = ' socks5://127.0.0.1:9050' )
184+ resp = httpx .get (target + "/.svn/entries" , proxies = " socks5://127.0.0.1:9050" )
155185 if not resp .text .__contains__ ("404" ):
156- cprint ("Alert!" , ' red' )
157- cprint (".SVN folder exposed publicly" , ' red' )
186+ cprint ("Alert!" , " red" )
187+ cprint (".SVN folder exposed publicly" , " red" )
158188 else :
159- cprint ("NO .SVN folder found" , ' blue' )
189+ cprint ("NO .SVN folder found" , " blue" )
160190
161191
162192def get_dot_htaccess (client : httpx .Client , target : str , response : str ) -> None :
163- """ Check link for .htaccess files on public domain.
193+ """Check link for .htaccess files on public domain.
164194
165195 Args:
166196 target (str): URL to be checked.
167197 response (object): Response object containing data to check.
168198 """
169- cprint ("[*]Checking for .htaccess" , ' yellow' )
199+ cprint ("[*]Checking for .htaccess" , " yellow" )
170200 url = target
171201 target = "{0.scheme}://{0.netloc}/" .format (urlsplit (url ))
172- resp = httpx .get (target + "/.htaccess" , proxies = ' socks5://127.0.0.1:9050' )
202+ resp = httpx .get (target + "/.htaccess" , proxies = " socks5://127.0.0.1:9050" )
173203 if resp .text .__contains__ ("403" ):
174- cprint ("403 Forbidden" , ' blue' )
204+ cprint ("403 Forbidden" , " blue" )
175205 elif not resp .text .__contains__ ("404" ) or resp .text .__contains__ ("500" ):
176- cprint ("Alert!!" , ' blue' )
177- cprint (".htaccess file found!" , ' blue' )
206+ cprint ("Alert!!" , " blue" )
207+ cprint (".htaccess file found!" , " blue" )
178208 else :
179- cprint ("Response" , ' blue' )
180- cprint (resp , ' blue' )
209+ cprint ("Response" , " blue" )
210+ cprint (resp , " blue" )
181211
182212
183213def display_webpage_description (soup : BeautifulSoup ) -> None :
@@ -186,8 +216,8 @@ def display_webpage_description(soup: BeautifulSoup) -> None:
186216 Args:
187217 soup (object): Processed HTML object.
188218 """
189- cprint ("[*]Checking for meta tag" , ' yellow' )
190- metatags = soup .find_all (' meta' )
219+ cprint ("[*]Checking for meta tag" , " yellow" )
220+ metatags = soup .find_all (" meta" )
191221 for meta in metatags :
192222 print ("Meta : " , meta )
193223
@@ -202,11 +232,11 @@ def writer(datasets, dataset_names, output_dir):
202232 """
203233 for dataset , dataset_name in zip (datasets , dataset_names ):
204234 if dataset :
205- filepath = output_dir + '/' + dataset_name + ' .txt'
235+ filepath = output_dir + "/" + dataset_name + " .txt"
206236
207- with open (filepath , 'w+' , encoding = ' utf8' ) as f :
208- f .write (str (' \n ' .join (dataset )))
209- f .write (' \n ' )
237+ with open (filepath , "w+" , encoding = " utf8" ) as f :
238+ f .write (str (" \n " .join (dataset )))
239+ f .write (" \n " )
210240 # else:
211241 # with open(filepath, 'w+') as f:
212242 # joined = '\n'.join(dataset)
0 commit comments