11const fs = require ( "fs" ) ,
2+ { readFile } = require ( "fs/promises" ) ,
23 { EventEmitter} = require ( "events" ) ,
34 nodeUtil = require ( "util" ) ,
4- async = require ( "async" ) ,
55 PDFJS = require ( "./lib/pdf" ) ,
66 { ParserStream} = require ( "./lib/parserstream" ) ,
77 { kColors, kFontFaces, kFontStyles} = require ( "./lib/pdfconst" ) ;
@@ -21,7 +21,6 @@ class PDFParser extends EventEmitter { // inherit from event emitter
2121 #password = "" ;
2222
2323 #context = null ; // service context object, only used in Web Service project; null in command line
24- #fq = null ; //async queue for reading files
2524
2625 #pdfFilePath = null ; //current PDF file to load and parse, null means loading/parsing not started
2726 #pdfFileMTime = null ; // last time the current pdf was modified, used to recognize changes and ignore cache
@@ -37,9 +36,6 @@ class PDFParser extends EventEmitter { // inherit from event emitter
3736 // private
3837 // service context object, only used in Web Service project; null in command line
3938 this . #context = context ;
40- this . #fq = async . queue ( ( task , callback ) => {
41- fs . readFile ( task . path , callback ) ;
42- } , 1 ) ;
4339
4440 this . #pdfFilePath = null ; //current PDF file to load and parse, null means loading/parsing not started
4541 this . #pdfFileMTime = null ; // last time the current pdf was modified, used to recognize changes and ignore cache
@@ -104,37 +100,34 @@ class PDFParser extends EventEmitter { // inherit from event emitter
104100 return false ;
105101 }
106102
107- #processPDFContent( err , data ) {
108- nodeUtil . p2jinfo ( "Load PDF file status:" + ( ! ! err ? "Error!" : "Success!" ) ) ;
109- if ( err ) {
110- this . #data = null ;
111- this . emit ( "pdfParser_dataError" , err ) ;
112- }
113- else {
114- PDFParser . #binBuffer[ this . binBufferKey ] = data ;
115- this . #startParsingPDF( ) ;
116- }
117- } ;
118-
119103 //public APIs
120104 createParserStream ( ) {
121105 return new ParserStream ( this , { objectMode : true , bufferSize : 64 * 1024 } ) ;
122106 }
123107
124- loadPDF ( pdfFilePath , verbosity ) {
108+ async loadPDF ( pdfFilePath , verbosity ) {
125109 nodeUtil . verbosity ( verbosity || 0 ) ;
126110 nodeUtil . p2jinfo ( "about to load PDF file " + pdfFilePath ) ;
127111
128112 this . #pdfFilePath = pdfFilePath ;
129- this . #pdfFileMTime = fs . statSync ( pdfFilePath ) . mtimeMs ;
130- if ( this . #processFieldInfoXML) {
131- this . #PDFJS. tryLoadFieldInfoXML ( pdfFilePath ) ;
132- }
133-
134- if ( this . #processBinaryCache( ) )
135- return ;
136113
137- this . #fq. push ( { path : pdfFilePath } , this . #processPDFContent. bind ( this ) ) ;
114+ try {
115+ this . #pdfFileMTime = fs . statSync ( pdfFilePath ) . mtimeMs ;
116+ if ( this . #processFieldInfoXML) {
117+ this . #PDFJS. tryLoadFieldInfoXML ( pdfFilePath ) ;
118+ }
119+
120+ if ( this . #processBinaryCache( ) )
121+ return ;
122+
123+ PDFParser . #binBuffer[ this . binBufferKey ] = await readFile ( pdfFilePath ) ;
124+ nodeUtil . p2jinfo ( `Load OK: ${ pdfFilePath } ` ) ;
125+ this . #startParsingPDF( ) ;
126+ }
127+ catch ( err ) {
128+ nodeUtil . p2jerror ( `Load Failed: ${ pdfFilePath } - ${ err } ` ) ;
129+ this . emit ( "pdfParser_dataError" , err ) ;
130+ }
138131 }
139132
140133 // Introduce a way to directly process buffers without the need to write it to a temporary file
0 commit comments