Skip to content

Commit b96581e

Browse files
authored
Merge pull request #102 from NHSDigital/ryma2fhir-patch-7
Update errorChecker.py
2 parents d5f287e + 90aa1a2 commit b96581e

2 files changed

Lines changed: 48 additions & 20 deletions

File tree

Lines changed: 48 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,56 @@
11
# -*- coding: utf-8 -*-
22
"""
3-
This script checks webpages for any error messages
3+
This script checks webpages for any error messages and console logs (not including stack trace)
44
"""
55

66
from linkScraper import *
7+
import re
78

9+
def classErrors(warnings, soup):
10+
'''returns all class errors within the webpage'''
11+
class_errors = soup.find_all('div',{'class':"error"})
12+
if class_errors:
13+
for err in class_errors:
14+
warnings.append(err)
15+
return warnings
816

9-
''' Interates over ListOfLinks returning any pages that have errors '''
10-
def FindErrors(url):
11-
websites = ListOfLinks(url)
12-
for e in websites:
13-
url_check = 'https://simplifier.net'+ e
14-
data_check = requests.get(url_check).text
15-
soup_check = BeautifulSoup(data_check,"html.parser")
16-
error = soup_check.find_all('div',{'class':"error"})
17-
if error:
18-
print(url_check)
19-
for err in error:
20-
print(err)
21-
print()
22-
print("Check Complete")
23-
17+
def consoleLog(warnings,soup):
18+
''' finds all console.log items, then finds the text associated with it. expect 'console.log(<eror type>,<error var>)' & '<error var> = JSON.stringify(`<text>`). Retuns wanings as <error var><text> '''
19+
script_tags = soup.find_all('script')
20+
for script in script_tags:
21+
script_text = script.get_text()
22+
log_messages = re.findall(r'console\.log\((.*?)\)', script_text)
23+
for msg in log_messages:
24+
msg = (msg.replace("'", "").replace(' ', '').split(','))
25+
lines = script_text.splitlines()
26+
for line in lines:
27+
try:
28+
if 'Stacktrace' not in msg[1] and 'var' not in line and 'console.log' not in line:
29+
warnings.append(msg[0]+" "+line.split("`")[1].rsplit("`", 0)[0].replace(' At','\n\tAt'))
30+
except IndexError:
31+
pass
32+
return(warnings)
2433

25-
FindErrors(data)
34+
def printWarnings(warnings, url):
35+
'''prints all warnings'''
36+
print(url)
37+
for x in warnings:
38+
print("\t",x,"\n")
39+
40+
''' Iterates over ListOfLinks returning any pages that have errors '''
41+
def getSoup(url):
42+
data = requests.get(url).text
43+
soup = BeautifulSoup(data,"html.parser")
44+
return soup
45+
46+
47+
websites = ListOfLinks(data)
48+
for suffix in websites:
49+
warnings = []
50+
soup = getSoup('https://simplifier.net'+ suffix)
51+
warnings = classErrors(warnings, soup)
52+
warnings = consoleLog(warnings,soup)
53+
if warnings:
54+
printWarnings(warnings, 'https://simplifier.net'+ suffix)
55+
56+
print("Check Complete")

IGPageContentValidator/linkScraper.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,13 +19,10 @@ def RequestData(url):
1919
def ListOfLinks(url):
2020
soup = RequestData(url)
2121
websites = []
22-
print("webpages to check")
2322
for link in soup.find_all('a'):
2423
site = link.get('href')
2524
if isinstance(site, str) and site[0:6]=='/guide':
26-
print(site)
2725
websites.append(site)
28-
print('\n\n')
2926
list_set = set(websites)
3027
unique_websites = list(list_set)
3128
return unique_websites

0 commit comments

Comments
 (0)