Merge pull request #102 from NHSDigital/ryma2fhir-patch-7

ryma2fhir · web-flow · commit b96581ee643b · 2024-07-25T11:56:30.000+01:00
Update errorChecker.py
diff --git a/IGPageContentValidator/errorChecker.py b/IGPageContentValidator/errorChecker.py
@@ -1,25 +1,56 @@
 # -*- coding: utf-8 -*-
 """ 
-This script checks webpages for any error messages
+This script checks webpages for any error messages and console logs (not including stack trace)
 """
 
 from linkScraper import *
+import re
 
+def classErrors(warnings, soup):
+    '''returns all class errors within the webpage'''
+    class_errors = soup.find_all('div',{'class':"error"})
+    if class_errors:
+        for err in class_errors:
+            warnings.append(err)
+    return warnings
 
-''' Interates over ListOfLinks returning any pages that have errors '''
-def FindErrors(url):
-    websites = ListOfLinks(url)
-    for e in websites:
-        url_check = 'https://simplifier.net'+ e
-        data_check  = requests.get(url_check).text
-        soup_check = BeautifulSoup(data_check,"html.parser")
-        error = soup_check.find_all('div',{'class':"error"})
-        if error:
-            print(url_check)
-            for err in error:
-                print(err)
-            print()
-    print("Check Complete")
-    
+def consoleLog(warnings,soup):
+    ''' finds all console.log items, then finds the text associated with it. expect 'console.log(<eror type>,<error var>)' & '<error var> = JSON.stringify(`<text>`). Retuns wanings as <error var><text> '''
+    script_tags = soup.find_all('script')
+    for script in script_tags:
+        script_text = script.get_text()
+        log_messages = re.findall(r'console\.log\((.*?)\)', script_text)
+        for msg in log_messages:
+            msg = (msg.replace("'", "").replace(' ', '').split(','))
+            lines = script_text.splitlines()
+            for line in lines:
+                try:
+                    if 'Stacktrace' not in msg[1] and 'var' not in line and 'console.log' not in line:
+                        warnings.append(msg[0]+" "+line.split("`")[1].rsplit("`", 0)[0].replace(' At','\n\tAt'))
+                except IndexError:
+                    pass
+    return(warnings)
 
-FindErrors(data)
+def printWarnings(warnings, url):
+    '''prints all warnings'''
+    print(url)
+    for x in warnings:
+        print("\t",x,"\n")
+            
+''' Iterates over ListOfLinks returning any pages that have errors '''
+def getSoup(url):
+    data  = requests.get(url).text
+    soup = BeautifulSoup(data,"html.parser")
+    return soup
+        
+    
+websites = ListOfLinks(data)
+for suffix in websites:
+    warnings = []
+    soup = getSoup('https://simplifier.net'+ suffix)
+    warnings = classErrors(warnings, soup)
+    warnings = consoleLog(warnings,soup)
+    if warnings:
+        printWarnings(warnings, 'https://simplifier.net'+ suffix)
+        
+print("Check Complete")
diff --git a/IGPageContentValidator/linkScraper.py b/IGPageContentValidator/linkScraper.py
@@ -19,13 +19,10 @@ def RequestData(url):
 def ListOfLinks(url):
     soup = RequestData(url)
     websites = []
-    print("webpages to check")
     for link in soup.find_all('a'):
         site = link.get('href')
         if isinstance(site, str) and site[0:6]=='/guide':
-            print(site)
             websites.append(site)
-    print('\n\n')
     list_set = set(websites)
     unique_websites = list(list_set)
     return unique_websites