diff --git a/IGPageContentValidator/errorChecker.py b/IGPageContentValidator/errorChecker.py index 60c3deb..10b9ee4 100644 --- a/IGPageContentValidator/errorChecker.py +++ b/IGPageContentValidator/errorChecker.py @@ -1,25 +1,56 @@ # -*- coding: utf-8 -*- """ -This script checks webpages for any error messages +This script checks webpages for any error messages and console logs (not including stack trace) """ from linkScraper import * +import re +def classErrors(warnings, soup): + '''returns all class errors within the webpage''' + class_errors = soup.find_all('div',{'class':"error"}) + if class_errors: + for err in class_errors: + warnings.append(err) + return warnings -''' Interates over ListOfLinks returning any pages that have errors ''' -def FindErrors(url): - websites = ListOfLinks(url) - for e in websites: - url_check = 'https://simplifier.net'+ e - data_check = requests.get(url_check).text - soup_check = BeautifulSoup(data_check,"html.parser") - error = soup_check.find_all('div',{'class':"error"}) - if error: - print(url_check) - for err in error: - print(err) - print() - print("Check Complete") - +def consoleLog(warnings,soup): + ''' finds all console.log items, then finds the text associated with it. expect 'console.log(,)' & ' = JSON.stringify(``). Retuns wanings as ''' + script_tags = soup.find_all('script') + for script in script_tags: + script_text = script.get_text() + log_messages = re.findall(r'console\.log\((.*?)\)', script_text) + for msg in log_messages: + msg = (msg.replace("'", "").replace(' ', '').split(',')) + lines = script_text.splitlines() + for line in lines: + try: + if 'Stacktrace' not in msg[1] and 'var' not in line and 'console.log' not in line: + warnings.append(msg[0]+" "+line.split("`")[1].rsplit("`", 0)[0].replace(' At','\n\tAt')) + except IndexError: + pass + return(warnings) -FindErrors(data) +def printWarnings(warnings, url): + '''prints all warnings''' + print(url) + for x in warnings: + print("\t",x,"\n") + +''' Iterates over ListOfLinks returning any pages that have errors ''' +def getSoup(url): + data = requests.get(url).text + soup = BeautifulSoup(data,"html.parser") + return soup + + +websites = ListOfLinks(data) +for suffix in websites: + warnings = [] + soup = getSoup('https://simplifier.net'+ suffix) + warnings = classErrors(warnings, soup) + warnings = consoleLog(warnings,soup) + if warnings: + printWarnings(warnings, 'https://simplifier.net'+ suffix) + +print("Check Complete") diff --git a/IGPageContentValidator/linkScraper.py b/IGPageContentValidator/linkScraper.py index a797602..a912b7c 100644 --- a/IGPageContentValidator/linkScraper.py +++ b/IGPageContentValidator/linkScraper.py @@ -19,13 +19,10 @@ def RequestData(url): def ListOfLinks(url): soup = RequestData(url) websites = [] - print("webpages to check") for link in soup.find_all('a'): site = link.get('href') if isinstance(site, str) and site[0:6]=='/guide': - print(site) websites.append(site) - print('\n\n') list_set = set(websites) unique_websites = list(list_set) return unique_websites