Skip to content

Commit ef0fe96

Browse files
committed
remove dependency: async
1 parent 06a405f commit ef0fe96

6 files changed

Lines changed: 48 additions & 65 deletions

File tree

bin/pdf2json

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,4 @@
11
#!/usr/bin/env node
22

3-
'use strict';
4-
5-
var P2JCMD = require('../lib/p2jcmd');
3+
const P2JCMD = require('../lib/p2jcmd');
64
new P2JCMD().start();

lib/p2jcmd.js

Lines changed: 26 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
1-
21
const nodeUtil = require("util"),
32
fs = require("fs"),
43
path = require("path"),
5-
async = require("async"),
64
{ParserStream, StringifyStream} = require("./parserstream"),
75
pkInfo = require("../package.json"),
86
PDFParser = require("../pdfparser");
@@ -96,35 +94,36 @@ class PDFProcessor {
9694
this.pdfParser.getAllFieldsTypesStream().pipe(new StringifyStream()).pipe(outputStream);
9795
}
9896

99-
#processAdditionalStreams(outputTasks, callback) {
100-
if (PROCESS_FIELDS_CONTENT) {//needs to generate fields.json file
101-
outputTasks.push(cbFunc => this.#generateFieldsTypesStream(cbFunc));
102-
}
103-
if (PROCESS_RAW_TEXT_CONTENT) {//needs to generate content.txt file
104-
outputTasks.push(cbFunc => this.#generateRawTextContentStream(cbFunc));
105-
}
106-
if (PROCESS_MERGE_BROKEN_TEXT_BLOCKS) {//needs to generate json file with merged broken text blocks
107-
outputTasks.push(cbFunc => this.#generateMergedTextBlocksStream(cbFunc));
108-
}
109-
110-
if (outputTasks.length > 0) {
111-
async.series(outputTasks, (err, results) => {//additional streams process complete
112-
if (err) {
113-
this.curCLI.addStatusMsg(err, `[+]=> ${err}`);
114-
} else {
115-
results.forEach( r => this.curCLI.addStatusMsg(null, `[+]=> ${r}`));
116-
}
117-
this.#continue(callback);
118-
});
119-
}
120-
else {
121-
this.#continue(callback);
122-
}
97+
#processAdditionalStreams(callback) {
98+
const outputTasks = [];
99+
if (PROCESS_FIELDS_CONTENT) {//needs to generate fields.json file
100+
outputTasks.push(cbFunc => this.#generateFieldsTypesStream(cbFunc));
101+
}
102+
if (PROCESS_RAW_TEXT_CONTENT) {//needs to generate content.txt file
103+
outputTasks.push(cbFunc => this.#generateRawTextContentStream(cbFunc));
104+
}
105+
if (PROCESS_MERGE_BROKEN_TEXT_BLOCKS) {//needs to generate json file with merged broken text blocks
106+
outputTasks.push(cbFunc => this.#generateMergedTextBlocksStream(cbFunc));
107+
}
108+
109+
let taskId = 0;
110+
function sequenceTask() {
111+
if (taskId < outputTasks.length) {
112+
outputTasks[taskId]((err, ret) => {
113+
this.curCLI.addStatusMsg(err, `[+]=> ${ret}`);
114+
taskId++;
115+
sequenceTask.call(this);
116+
});
117+
}
118+
else
119+
this.#continue(callback);
120+
}
121+
sequenceTask.call(this);
123122
}
124123

125124
#onPrimarySuccess(callback) {
126125
this.curCLI.addResultCount();
127-
this.#processAdditionalStreams([], callback);
126+
this.#processAdditionalStreams(callback);
128127
}
129128

130129
#onPrimaryError(err, callback) {

package-lock.json

Lines changed: 1 addition & 6 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,13 +47,11 @@
4747
"pdf2json": "./bin/pdf2json"
4848
},
4949
"dependencies": {
50-
"async": "^3.2.1",
5150
"@xmldom/xmldom": "^0.7.5",
5251
"yargs": "^17.2.1"
5352
},
5453
"devDependencies": {},
5554
"bundledDependencies": [
56-
"async",
5755
"@xmldom/xmldom",
5856
"yargs"
5957
],

pdfparser.js

Lines changed: 19 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
const fs = require("fs"),
2+
{ readFile } = require("fs/promises"),
23
{EventEmitter} = require("events"),
34
nodeUtil = require("util"),
4-
async = require("async"),
55
PDFJS = require("./lib/pdf"),
66
{ParserStream} = require("./lib/parserstream"),
77
{kColors, kFontFaces, kFontStyles} = require("./lib/pdfconst");
@@ -21,7 +21,6 @@ class PDFParser extends EventEmitter { // inherit from event emitter
2121
#password = "";
2222

2323
#context = null; // service context object, only used in Web Service project; null in command line
24-
#fq = null; //async queue for reading files
2524

2625
#pdfFilePath = null; //current PDF file to load and parse, null means loading/parsing not started
2726
#pdfFileMTime = null; // last time the current pdf was modified, used to recognize changes and ignore cache
@@ -37,9 +36,6 @@ class PDFParser extends EventEmitter { // inherit from event emitter
3736
// private
3837
// service context object, only used in Web Service project; null in command line
3938
this.#context = context;
40-
this.#fq = async.queue( (task, callback) => {
41-
fs.readFile(task.path, callback);
42-
}, 1);
4339

4440
this.#pdfFilePath = null; //current PDF file to load and parse, null means loading/parsing not started
4541
this.#pdfFileMTime = null; // last time the current pdf was modified, used to recognize changes and ignore cache
@@ -104,37 +100,34 @@ class PDFParser extends EventEmitter { // inherit from event emitter
104100
return false;
105101
}
106102

107-
#processPDFContent(err, data) {
108-
nodeUtil.p2jinfo("Load PDF file status:" + (!!err ? "Error!" : "Success!") );
109-
if (err) {
110-
this.#data = null;
111-
this.emit("pdfParser_dataError", err);
112-
}
113-
else {
114-
PDFParser.#binBuffer[this.binBufferKey] = data;
115-
this.#startParsingPDF();
116-
}
117-
};
118-
119103
//public APIs
120104
createParserStream() {
121105
return new ParserStream(this, {objectMode: true, bufferSize: 64 * 1024});
122106
}
123107

124-
loadPDF(pdfFilePath, verbosity) {
108+
async loadPDF(pdfFilePath, verbosity) {
125109
nodeUtil.verbosity(verbosity || 0);
126110
nodeUtil.p2jinfo("about to load PDF file " + pdfFilePath);
127111

128112
this.#pdfFilePath = pdfFilePath;
129-
this.#pdfFileMTime = fs.statSync(pdfFilePath).mtimeMs;
130-
if (this.#processFieldInfoXML) {
131-
this.#PDFJS.tryLoadFieldInfoXML(pdfFilePath);
132-
}
133-
134-
if (this.#processBinaryCache())
135-
return;
136113

137-
this.#fq.push({path: pdfFilePath}, this.#processPDFContent.bind(this));
114+
try {
115+
this.#pdfFileMTime = fs.statSync(pdfFilePath).mtimeMs;
116+
if (this.#processFieldInfoXML) {
117+
this.#PDFJS.tryLoadFieldInfoXML(pdfFilePath);
118+
}
119+
120+
if (this.#processBinaryCache())
121+
return;
122+
123+
PDFParser.#binBuffer[this.binBufferKey] = await readFile(pdfFilePath);
124+
nodeUtil.p2jinfo(`Load OK: ${pdfFilePath}`);
125+
this.#startParsingPDF();
126+
}
127+
catch(err) {
128+
nodeUtil.p2jerror(`Load Failed: ${pdfFilePath} - ${err}`);
129+
this.emit("pdfParser_dataError", err);
130+
}
138131
}
139132

140133
// Introduce a way to directly process buffers without the need to write it to a temporary file

readme.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -888,7 +888,7 @@ In order to support this auto merging capability, text block objects have an add
888888
* More test coverage, 4 more test scripts added, see _package.json_ for details
889889
* Easier access to dictionaries, including color, font face and font style, see Dictionary reference section for details
890890
* Refactor to ES6 class for major entry modules
891-
* Dependency is removed: lodash.
891+
* Dependencies removed: lodash, async
892892
* Upgrade to Node v14.18.0 LTSs
893893
894894
### Install on Ubuntu

0 commit comments

Comments
 (0)