Skip to content

Commit bef175d

Browse files
committed
Format cleanup of ghdb_scraper.py
1 parent cf301f9 commit bef175d

1 file changed

Lines changed: 12 additions & 8 deletions

File tree

ghdb_scraper.py

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
# Standard Python libraries.
44
import argparse
55
import json
6+
import urllib3
67

78
# Third party Python libraries.
89
from bs4 import BeautifulSoup
@@ -64,12 +65,20 @@ def retrieve_google_dorks(
6465

6566
print(f"[+] Requesting URL: {url}")
6667
try:
67-
response = requests.get(url, headers=headers, timeout=10)
68+
response = requests.get(
69+
url,
70+
headers=headers,
71+
timeout=10,
72+
)
6873
except requests.exceptions.SSLError:
69-
import urllib3
7074
requests.packages.urllib3.disable_warnings()
7175
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
72-
response = requests.get(url, headers=headers, timeout=10, verify=False)
76+
response = requests.get(
77+
url,
78+
headers=headers,
79+
timeout=10,
80+
verify=False,
81+
)
7382

7483
if response.status_code != 200:
7584
print(f"[-] Error retrieving google dorks from: {url}")
@@ -90,7 +99,6 @@ def retrieve_google_dorks(
9099

91100
# Loop through dorks, collecting and organizing them.
92101
for dork in json_dorks:
93-
94102
# Extract dork from <a href> using BeautifulSoup.
95103
# "<a href=\"/ghdb/5052\">inurl:_cpanel/forgotpwd</a>"
96104
soup = BeautifulSoup(dork["url_title"], "html.parser")
@@ -105,7 +113,6 @@ def retrieve_google_dorks(
105113

106114
# Create an empty list for each category if it doesn't already exist.
107115
if numeric_category_id not in category_dict:
108-
109116
category_dict[numeric_category_id] = {"category_name": category_name, "dorks": []}
110117

111118
# Some of the URL titles have trailing tabs, use replace() to remove it in place. The strip() method cannot be
@@ -116,12 +123,10 @@ def retrieve_google_dorks(
116123

117124
# If requested, break up dorks into individual files based off category.
118125
if save_individual_categories_to_files:
119-
120126
# Sort category_dict based off the numeric keys.
121127
category_dict = dict(sorted(category_dict.items()))
122128

123129
for key, value in category_dict.items():
124-
125130
# Provide some category metrics.
126131
print(f"[*] Category {key} ('{value['category_name']}') has {len(value['dorks'])} dorks")
127132

@@ -169,7 +174,6 @@ def retrieve_google_dorks(
169174

170175

171176
if __name__ == "__main__":
172-
173177
categories = {
174178
1: "Footholds",
175179
2: "File Containing Usernames",

0 commit comments

Comments
 (0)