Skip to content

Commit ba86977

Browse files
committed
Fix extraneous newlines (#232)
1 parent 1c72e2d commit ba86977

3 files changed

Lines changed: 81 additions & 12 deletions

File tree

lib/block-text-builder.js

Lines changed: 23 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -135,19 +135,31 @@ class BlockTextBuilder {
135135
}
136136

137137
if (
138-
this.whitespaceProcessor.testContainsWords(str) || // There are words to add;
139-
(str.length && !this._stackItem.stashedLineBreaks) // or at least spaces to take into account.
140-
) {
141-
if (this._stackItem.stashedLineBreaks) {
142-
this._stackItem.inlineTextBuilder.startNewLine(this._stackItem.stashedLineBreaks);
138+
str.length === 0 || // empty string
139+
(
140+
this._stackItem.stashedLineBreaks && // stashed linebreaks make whitespace irrelevant
141+
!this.whitespaceProcessor.testContainsWords(str) // no words to add
142+
)
143+
) { return; }
144+
145+
if (this.options.preserveNewlines) {
146+
const newlinesNumber = this.whitespaceProcessor.countNewlinesNoWords(str);
147+
if (newlinesNumber > 0) {
148+
this._stackItem.inlineTextBuilder.startNewLine(newlinesNumber);
149+
// keep stashedLineBreaks unchanged
150+
return;
143151
}
144-
this.whitespaceProcessor.shrinkWrapAdd(
145-
str,
146-
this._stackItem.inlineTextBuilder,
147-
(this._wordTransformer && !noWordTransform) ? this._getCombinedWordTransformer() : undefined
148-
);
149-
this._stackItem.stashedLineBreaks = 0; // inline text doesn't introduce line breaks
150152
}
153+
154+
if (this._stackItem.stashedLineBreaks) {
155+
this._stackItem.inlineTextBuilder.startNewLine(this._stackItem.stashedLineBreaks);
156+
}
157+
this.whitespaceProcessor.shrinkWrapAdd(
158+
str,
159+
this._stackItem.inlineTextBuilder,
160+
(this._wordTransformer && !noWordTransform) ? this._getCombinedWordTransformer() : undefined
161+
);
162+
this._stackItem.stashedLineBreaks = 0; // inline text doesn't introduce line breaks
151163
}
152164

153165
/**

lib/whitespace-processor.js

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,10 +33,11 @@ class WhitespaceProcessor {
3333
this.leadingWhitespaceRe = new RegExp(`^[${whitespaceCodes}]`);
3434
this.trailingWhitespaceRe = new RegExp(`[${whitespaceCodes}]$`);
3535
this.allWhitespaceOrEmptyRe = new RegExp(`^[${whitespaceCodes}]*$`);
36+
this.newlineOrNonWhitespaceRe = new RegExp(`(\\n|[^\\n${whitespaceCodes}])`, 'g');
3637

3738
if (options.preserveNewlines) {
3839

39-
const wordOrNewlineRe = new RegExp(`\n|[^\n${whitespaceCodes}]+`, 'gm');
40+
const wordOrNewlineRe = new RegExp(`\\n|[^\\n${whitespaceCodes}]+`, 'gm');
4041

4142
/**
4243
* Shrink whitespaces and wrap text, add to the builder.
@@ -128,6 +129,28 @@ class WhitespaceProcessor {
128129
return !this.allWhitespaceOrEmptyRe.test(text);
129130
}
130131

132+
/**
133+
* Return the number of newlines if there are no words.
134+
*
135+
* If any word is found then return zero regardless of the actual number of newlines.
136+
*
137+
* @param { string } text Input string.
138+
* @returns { number }
139+
*/
140+
countNewlinesNoWords (text) {
141+
this.newlineOrNonWhitespaceRe.lastIndex = 0;
142+
let counter = 0;
143+
let match;
144+
while ((match = this.newlineOrNonWhitespaceRe.exec(text)) !== null) {
145+
if (match[0] === '\n') {
146+
counter++;
147+
} else {
148+
return 0;
149+
}
150+
}
151+
return counter;
152+
}
153+
131154
}
132155

133156
module.exports = { WhitespaceProcessor: WhitespaceProcessor };

test/html-to-text.js

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,40 @@ describe('html-to-text', function () {
182182
expect(convert(html, { preserveNewlines: true })).to.equal(expected);
183183
});
184184

185+
it('should produce equal results regardless of newline position between blocks', function () {
186+
const newlineOutside = '<p>A</p>\n<p>B</p>';
187+
const newlineInside = '<p>A</p><p>\nB</p>';
188+
const r1 = convert(newlineOutside, { preserveNewlines: true });
189+
const r2 = convert(newlineInside, { preserveNewlines: true });
190+
expect(r1).to.equal(r2);
191+
});
192+
193+
it('should produce equal results for preserved newlines and BR tags', function () {
194+
const nlHtml = '<p>A</p>\n<p>B</p><p>\nC</p>';
195+
const brHtml = '<p>A</p><br/><p>B</p><p><br/>C</p>';
196+
const nlResult = convert(nlHtml, { preserveNewlines: true });
197+
const brResult = convert(brHtml);
198+
expect(nlResult).to.equal(brResult);
199+
});
200+
201+
it('should account for trailing/leading linebreaks of adjacent blocks equally', function () {
202+
const html = '<p>A</p>\n<div>B</div>\n<div>C</div>\n<p>D</p>';
203+
const newlineInside = 'A\n\n\nB\n\nC\n\n\nD';
204+
expect(convert(html, { preserveNewlines: true })).to.equal(newlineInside);
205+
});
206+
207+
it('should work with multiple linebreaks and in presence of whitespaces', function () {
208+
const html = '<p>A</p> \n \n <p>B</p>';
209+
const newlineInside = 'A\n\n\n\nB';
210+
expect(convert(html, { preserveNewlines: true })).to.equal(newlineInside);
211+
});
212+
213+
it('should have no special behavior in presence of words among linebreaks', function () {
214+
const html = '<p>A</p> \n B \n <p>C</p>';
215+
const newlineInside = 'A\n\n\nB\n\n\nC';
216+
expect(convert(html, { preserveNewlines: true })).to.equal(newlineInside);
217+
});
218+
185219
});
186220

187221
describe('unicode and html entities', function () {

0 commit comments

Comments
 (0)