diff --git a/.github/workflows/websiteChecker.yml b/.github/workflows/websiteChecker.yml index 6b9ba00..8f7736d 100644 --- a/.github/workflows/websiteChecker.yml +++ b/.github/workflows/websiteChecker.yml @@ -42,7 +42,7 @@ jobs: curl ${{ github.event.inputs.websiteurl }} -s -f -o /dev/null - name: Execute Link Check run: > - linkchecker -r 2 --check-extern --no-status -f + linkchecker -r 2 --check-extern --no-status --ignore-url=[@] --ignore-url=r"^(?!.*http).*html.*$" -f ./IGPageContentValidator/linkcheckerrc ${{ github.event.inputs.websiteurl }} || test $? = 1; job3: name: spell checker @@ -63,4 +63,4 @@ jobs: run: INPUT_STORE=${{ github.event.inputs.websiteurl }} python ./IGPageContentValidator/relToAbsLinks.py - name: Execute Spell Check - run: cat OutputLinks.txt | while read p; do wget -nv -O - $p | aspell list -H --camel-case --lang en_GB --add-html-skip=nocheck -p ./IGPageContentValidator/.aspell.en.pws |sort| uniq -c; echo -e '\n'; done; + run: cat OutputLinks.txt | while read p; do wget -nv -O - $p | aspell list -H --ignore 2 --camel-case --lang en_GB --add-html-skip=nocheck -p ./IGPageContentValidator/.aspell.en.pws |sort| uniq -c; echo -e '\n'; done; diff --git a/IGPageContentValidator/.aspell.en.pws b/IGPageContentValidator/.aspell.en.pws index 8c47619..adda034 100644 --- a/IGPageContentValidator/.aspell.en.pws +++ b/IGPageContentValidator/.aspell.en.pws @@ -1,224 +1,774 @@ -personal_ws-1.1 en 192 +personal_ws-1.1 en 992 +ACF +ACS ACVPU -admitter -AllergyIntolEnd +ACW +accused +ADOPTF +ADOPTM +ADOPTP +ADRT +ADT +AJV AMB AMPP -apim -asserter +ASID +ATND +AUS +AllergyIntolEnd +Alwoodley +AoMRC +AstraZeneca AstraZenecaVaccine Atenolol -ATND -authorizing -backport -Betamethasone +Autogenous +BCG +bcp +BEWLEY BMI -boolean +BROINLAW BUPA -cadaveric +Betamethasone +Bisoprolol +Brightside +Brucellosis +Burk +CAF CALLBCK -cardinalities -Cardinality -cardinality +CAMHS +CELVAPAN +CHLDADOPT +CHLDFOST +CHLDINLAW +CIB +CIMI +CLD +CMET +CMETs +CNE +COUSN +CP +CQRS +CRE +CRI +CRS +CSO +CTV +CWE +Calmette Cardinality CardiologySJUH Careplan -caresettingtype +Center +Cerner +Chewable Citalopram -CLD -codeable +Clostridium CodingSCT CodingSCTDescDisplay CodingSCTDescId +Collodion +Comirnaty +ConsultantSandraGose +Covid +Cramer +Cubital +Cytogenetics +Cytomegalovirus +DAU +DAUADOPT +DAUC +DAUFOST +DAUINLAW +DCB +DEV +DHCW +DOMPART +DSPT +DSTU +DT +DTS +DeGrasse +Dermatopathology +Desc +DescId +Dispersible +DoctorPaulRastall +ECL +EDI +EDQM +EEA +EHIC +EHR +ELLAND +EMER +EMG +EMIS +ENT +EP +EPR +EPS +EPSIssueCode +ERS +ESC +EUD +EUO +Electrocardiac +Electrocardiographic +Endocervical +Endodontic +Endodontics +Endosinusial +Endotracheopulmonary +Enterer +Epilesional +Episodicity +ExtensionUKCoreCodingSCTDescDisplay +ExtensionUKCoreMedicationStatementLastIssueDate +Extraamniotic +FAMMEMB +FGM +FHIR +FK +FLD +FMG +FMM +FMRSPS +FNP +FP +FPA +FRND +FSN +FTH +FTHFOST +FTHINLAW +FTWIN +FTWINBRO +FTWINSIS +Fearnville +Firely +FiveWs +GDS +GESTM +GGRFTH +GGRMTH +GGRPRN +GIM +GLH +GMC +GMP +GPES +GPET +GPhC +GPs +GRFTH +GRMTH +GRNDCHILD +GRNDDAU +GRNDSON +GRPRN +GTIN +GUID +Gastro +Gastroenteral +Gastrostomy +Gingival +Gose +Goserelin +Guerin +HBRO +HCP +HCPC +HCPs +HH +HL +HNC +HSCI +HSCIC +HSCN +HSIB +HSIS +HUSB +Haemodiafiltration +Haleon +Hb +Hematology +Hepatobiliary +Holter +IAO +IAPT +ICS +ICSs +IDT +IG +IGARD +IHN +IHS +IHTSDO +IM +INHOUSE +INLAW +INTEROPen +IPS +ITK +ITKResponseCode +ITWIN +ITWINBRO +ITWINSIS +IVIg +Immunopathology +Infarctus +Infective +Informatician +Instill +Intermountain +Interoperable +Intraamniotic +Intraarterial +Intraarticular +Intrabursal +Intracameral +Intracardiac +Intracavernous +Intracerebroventricular +Intracervical +Intracoronary +Intradermal +Intradiscal +Intraepidermal +Intraglandular +Intralesional +Intralymphatic +Intraocular +Intraosseous +Intraperitoneal +Intrapleural +Intrasternal +Intratendinous +Intrathecal +Intratumoral +Intraventricular +Intravesical +Intravitreal +JIRA +JSON +JWT +Javascript +Jejunostomy +Kanban +LDAP +LENVILLE +LOINC +LRS +Leishmaniasis +MAUNT +MBE +MCOUSN +MGGRFTH +MGGRMTH +MGGRPRN +MGP +MGRFTH +MGRMTH +MGRPRN +MH +MHRA +MHRT +MHS +MIF +MIM +MMR +MPM +MRN +MTH +MTHFOST +MTHINLAW +MUNCLE +MVC +Makaton +MedianCubitalVeinExample +Mellitues +Metformin +Minicom +Morphologically +Muco +Mycobacteriology +NACS +NAVU +NBO +NBOR +NBRO +NCHILD +NEMS +NFTH +NFTHF +NHS +NHSD +NHSE +NHSX +NIENEPH +NMC +NMTH +NMTHF +NONAC +NPM +NPRN +NRLS +NSIB +NSIS +NTIN +Nasogastric +Nasojejunal +NegHandlNoKnownAllergies +Neuropathology +Neuropsychiatry +OBSENC +OID +OOP +OUTLAB +OVM +Ontoserver +Opioid +Organization +Organizational +Orodispersible +Oromucosal +Orthogeriatric +Orthoptics +Orthotics +Otic +Otitis +Overtown +PAF +PANDEMRIX +PAUNT +PCOUSN +PDS +PGD +PGGRFTH +PGGRMTH +PGGRPRN +PGRFTH +PGRMTH +PGRPRN +PHS +PID +PPRF +PRENC +PRN +PRNFOST +PRNINLAW +PRSB +PTL +PTV +PUNCLE +PV +Panadol +Parasitology +PathologyAndLaboratoryMedicineObservables +PaulRastall +Percutaneous +Periarticular +Peribulbar +Perinatology +Perineural +Periosseous +Peritendinous +Peritumoral +Plc +Pneumovax +Podiatric +ProfilesandExtensions +Prosthodontic +Prosthodontics +Psychogeriatric +Quadrivalent +RBAC +RDW +RESTful +RMIM +ROA +Radionuclide +Radiopharmaceutical +Rafferty +Ramipril +Rastall +Refset +Retrobulbar +Rheumatology +Rotavirus +Ryver +SAML +SCAL +SCR +SCT +SDS +SIB +SIBINLAW +SIGOTHR +SIMPLIFIER +SIRO +SISINLAW +SJUH +SLT +SME +SMR +SMS +SMSP +SMTP +SNOMED +SOA +SONADOPT +SONC +SONFOST +SONINLAW +SPRF +SPS +SSB +SSP +STPBRO +STPCHLD +STPDAU +STPFTH +STPMTH +STPPRN +STPSIB +STPSIS +STPSON +STPs +SUS +Sandmoor +SandraGose +Seperate +Septrin +Simplifier +Simvastatin +Subconjunctival +Submucosal +Subretinal +Surview +Syncytial +TBC +TF +TMS +TRUD +TTO +TWINBRO +TWINSIS +Taitor +Telecare +Telehealth +Telehealthcare +Telemedicine +Telepractice +Templated +Timolol +TimololVTM +Timoptol +TimoptolEyeDrops +Transdermal +Translingual +Transmucosal +Typhi +UCUM +UI +UML +UMS +UNC +UNK +UPRN +URI +URP +UTF +UUID +Ultrasonography +Uptitrate +Urticarial +Uv +VMP +VMPP +VMPs +VNA +VR +VTM +VUsgQ +VaccinationProcedureCovid +Vaccinia +Valueset +Varicella +Vaxzevria +Venipuncture +Vestibular +WALKIN +WGS +Wellbeing +XMI +XSLT +YYYY +accused +acellular +admitter +alphaherpesvirus +anticoagulation +anticytomegalovirus +antirheumatic +apim +artifact +asserter +attester +audiovestibular +authorized +authorizing +backport +backporting +bacteremia +bariatric +bfe +bfi +boolean +bordetella +br +buccal +cadaveric +cardinalities +cardinality +cardioplegia +cardiothoracic +caresettingtype +childrens +codeable +codec +colorectal +colposcopy compositional computable conformant -ConsultantSandraGose +contemporarily coronavirus -Covid +creatinine +crs curation dataset datasets datatypes -DeGrasse deployable -DescId designee -DHCW +df +diag dicom +dietician discharger -DoctorPaulRastall -DSTU -ECL -EMER -Episodicity -episodicity +dm +drp +eGFR ePMA -EPS -EPSIssueCode -ERS -ESC +eXtensible +edu +ele +elec +electrophysiology +enterer +enterica +episodicity excipient extensibility -eXtensible +ferritin fhir -FHIR fhirukcorer -Firely -FiveWs -FLD +foetal foetus formulary +fri fulfill fulfiller +fulfills +gastroenteral +gastroenterology +gastroscopy generalizes genomic germline -GMC -Goserelin gp -GPhC -GTIN -GUID +grplab +haemodialysis +haemofiltration +haemophilus +haemorrhagic hardcoded -HCPC -HCPs -HH -HL +hepatology +histocompatibility +homoeopathy hospitalization +http ietf -IG -IHTSDO img +imm immunization +immunizations +immunizing +immunogenetics +immunotherapy implementability implementers incrementing -Informatician +influenzae informaticians +inlaw +inreach interdependencies -INTEROPen -Interoperable interoperable +interventional +intra +intranasal intraoperatively +intravesical intubation -IPS -ITK -ITKResponseCode -JIRA +iontophoresis jpeg jpg json -JSON -Kanban +lang lexically lifecycle -LOINC loinc -MBE +lt +lymphoedema +lyophilisate +maxillofacial +medicationdispense meds +melanocytic +mellitus +meningococcal +meningococcus messageheader -Metformin +metamodel +micrograms milliliter millimeter millimoles -Minicom +minimize +mmm mmol -MRN +mon +muco multidose -NegHandlNoKnownAllergies -NHS +musculoskeletal +myocarde +naevus +namespace +nd +nebuliser +neighbor +nephrology +neurodisability +neurophysiology nhs -NHS -NHSE -NHSX -NONAC -NPM -NTIN -OBSENC +nilknown +nn +notasked +notstarted +observables ods -OID ok -Ontoserver +oligosaccharide +onboarded +onboarding onwards +ordinated organization -Organization +organizations +organizing +orthoptics +orthotic +orthotics +otolaryngology pagelink +papilloma +papillomavirus parenteral -PaulRastall -PDS -PGD +partum +pathologic +pathophysiologic +patientspecified +pdf +pentavalent phenotypic +pillule +Pillules +plasmid png -PPRF +polyribosylribitol +polysaccharide pre -PRENC prescriber -ProfilesandExtensions -PRSB -Rastall +prescribers +primarycare +prioirity +proficiencies +psychogeriatric +pulmonology +quadrivalent +radiopharmaceutical recombinant -Refset refsets -RESTful +reli rfc +rheumatology +roadmap roadmaps -Ryver -SandraGose +rotavirus schematron sctdescid -SDS -Septrin -Simplifier -SIMPLIFIER -SJUH -SLT +semail +serovar smartcard -SNOMED snomedCT +specialized specialties specialty sphygmomanometer -SPRF stu +subtype superset +syncytial synonymously syntaxes -Taitor -TBC -telecom +tbody +td telecom televideo +tetani textphone -Timolol -TimololVTM -Timoptol -TimoptolEyeDrops +th +thalassaemia +thu +toxoid +tpcHBlZCB triaged -TTO -UCUM -UI +tricyclic +tue +uid ukcore ukcorelogos +un unmapped -URI uri url +urls +urogynaecology utf -UUID uuid -UTF -VaccinationProcedureCovid +valent validator -Valueset -Vaxzevria -VMP -VMPP -VR -VTM +valueset +varicella +virion wardened +wbWVudCB webpages +wgs xds xml xmlns -YYYY +yyyy diff --git a/IGPageContentValidator/README.md b/IGPageContentValidator/README.md index cc7eb35..0d66946 100644 --- a/IGPageContentValidator/README.md +++ b/IGPageContentValidator/README.md @@ -28,7 +28,11 @@ This is set up to only output the errors for each individual page. Uses the conf - `-r 2` - Sets the recursion level at 2. All links within the webpage within websites.txt and any internal webapges linked from this are scraped for links and checked. - `--check-extern` - check external links are valid. - `--no-status` - do not show status apart from errors. -- `-f linkcheckerrc` - use config file - setup so that it will check any pages <50mb in size. +- `-f linkcheckerrc` - use config file + - setup so that it will check any pages <50mb in size. + - ignores any link that contains png or @ + - ignores any http-redirected warnings + - ignores any links that does not contain `http` but does `html`. This is a workarond to ignore internal markdown links, which HL7 have many which only works on their website and not within the asset renders. ### LinkCheckerError: File size too large If the reponse is `[url-error-getting-content] could not get content:` `LinkCheckerError: File size too large` the page size is larger than what the maximum has been set. To fix this increase the `maxfilesizedownload` (line 177) within the `linkcheckerrc` file accordingly. @@ -40,6 +44,7 @@ Uses [Aspell](https://www.gnu.org/software/wget/manual/wget.html#Option-Syntax). #### wget - `-nv` - Turn off verbose without being completely quiet (use ‘-q’ for that), which means that error messages and basic information still get printed. - `-O` - The documents will not be written to the appropriate files, but all will be concatenated together and written to file. If ‘-’ is used as file, documents will be printed to standard output, disabling link conversion. +- `--ignore 2` - Ignores any words contain 2 or less letters #### Aspell - `-H` - Sets mode to HTML diff --git a/IGPageContentValidator/linkcheckerrc b/IGPageContentValidator/linkcheckerrc index 280b939..176d6e3 100644 --- a/IGPageContentValidator/linkcheckerrc +++ b/IGPageContentValidator/linkcheckerrc @@ -191,7 +191,12 @@ maxrequestspersecond=4 ##################### filtering options ########################## [filtering] -#ignore= +ignore= + \.png +# \@ + [@] +# ^(?!.*http).*html.*$ + r"^(?!.*http).*html.*$" # ignore everything with 'lconline' in the URL name # lconline # and ignore everything with 'bookmark' in the URL name @@ -208,7 +213,7 @@ maxrequestspersecond=4 # recognized warnings). Add a comma-separated list of warnings here # that prevent a valid URL from being logged. Note that the warning # will be logged for invalid URLs. Example: -#ignorewarnings=url-unicode-domain +ignorewarnings=http-redirected # Regular expression to add more URLs recognized as internal links. # Default is that URLs given on the command line are internal. #internlinks=^http://www\.example\.net/