Merge pull request #83 from TaroballzChen/master

opsdisk · web-flow · commit bf84ae4381fb · 2023-06-17T12:54:36.000-05:00
solve the SSL cert verify failed issue
diff --git a/ghdb_scraper.py b/ghdb_scraper.py
@@ -3,6 +3,7 @@
 # Standard Python libraries.
 import argparse
 import json
+import urllib3
 
 # Third party Python libraries.
 from bs4 import BeautifulSoup
@@ -11,7 +12,7 @@
 # Custom Python libraries.
 
 
-__version__ = "1.0.0"
+__version__ = "1.1.0"
 
 """
 Dork dictionary example:
@@ -63,7 +64,21 @@ def retrieve_google_dorks(
     }
 
     print(f"[+] Requesting URL: {url}")
-    response = requests.get(url, headers=headers, timeout=10)
+    try:
+        response = requests.get(
+            url,
+            headers=headers,
+            timeout=10,
+        )
+    except requests.exceptions.SSLError:
+        requests.packages.urllib3.disable_warnings()
+        urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
+        response = requests.get(
+            url,
+            headers=headers,
+            timeout=10,
+            verify=False,
+        )
 
     if response.status_code != 200:
         print(f"[-] Error retrieving google dorks from: {url}")
@@ -84,7 +99,6 @@ def retrieve_google_dorks(
 
     # Loop through dorks, collecting and organizing them.
     for dork in json_dorks:
-
         # Extract dork from <a href> using BeautifulSoup.
         # "<a href=\"/ghdb/5052\">inurl:_cpanel/forgotpwd</a>"
         soup = BeautifulSoup(dork["url_title"], "html.parser")
@@ -99,7 +113,6 @@ def retrieve_google_dorks(
 
         # Create an empty list for each category if it doesn't already exist.
         if numeric_category_id not in category_dict:
-
             category_dict[numeric_category_id] = {"category_name": category_name, "dorks": []}
 
         # Some of the URL titles have trailing tabs, use replace() to remove it in place.  The strip() method cannot be
@@ -110,12 +123,10 @@ def retrieve_google_dorks(
 
     # If requested, break up dorks into individual files based off category.
     if save_individual_categories_to_files:
-
         # Sort category_dict based off the numeric keys.
         category_dict = dict(sorted(category_dict.items()))
 
         for key, value in category_dict.items():
-
             # Provide some category metrics.
             print(f"[*] Category {key} ('{value['category_name']}') has {len(value['dorks'])} dorks")
 
@@ -163,7 +174,6 @@ def retrieve_google_dorks(
 
 
 if __name__ == "__main__":
-
     categories = {
         1: "Footholds",
         2: "File Containing Usernames",
diff --git a/pagodo.py b/pagodo.py
@@ -154,7 +154,6 @@ def go(self):
         }
 
         for dork in self.google_dorks:
-
             self.pagodo_results_dict["dorks"][dork] = {
                 "urls_size": 0,
                 "urls": [],
@@ -237,7 +236,6 @@ def go(self):
 
                 # Google dork results found.
                 if dork_urls_list:
-
                     self.log.info(f"Results: {dork_urls_list_size} URLs found for Google dork: {dork}")
 
                     dork_urls_list_as_string = "\n".join(dork_urls_list)
@@ -307,7 +305,6 @@ def _split_lines(self, text, width):
 
 
 if __name__ == "__main__":
-
     parser = argparse.ArgumentParser(
         description=f"pagodo - Passive Google Dork v{__version__}",
         formatter_class=SmartFormatter,
diff --git a/requirements.txt b/requirements.txt
@@ -1,3 +1,3 @@
-beautifulsoup4==4.11.1
-requests==2.28.2
-yagooglesearch==1.6.1
+beautifulsoup4==4.12.2
+requests==2.31.0
+yagooglesearch==1.7.0