diff options
| author | Henry Jameson <me@hjkos.com> | 2019-11-14 22:40:20 +0200 |
|---|---|---|
| committer | Henry Jameson <me@hjkos.com> | 2019-11-14 22:40:20 +0200 |
| commit | bd2a682b83743311645241fe644e853e1a359b67 (patch) | |
| tree | 18a7b1f65d059f819da1e30adee2351029feb18b | |
| parent | 51ea295704c52b1f9a922868aedf264e53a5ec92 (diff) | |
tests + updates
| -rw-r--r-- | src/services/tiny_post_html_processor/tiny_post_html_processor.service.js | 19 | ||||
| -rw-r--r-- | test/unit/specs/services/tiny_post_html_processor/tiny_post_html_processor.spec.js | 96 |
2 files changed, 111 insertions, 4 deletions
diff --git a/src/services/tiny_post_html_processor/tiny_post_html_processor.service.js b/src/services/tiny_post_html_processor/tiny_post_html_processor.service.js index b96c1ccf..de6f20ef 100644 --- a/src/services/tiny_post_html_processor/tiny_post_html_processor.service.js +++ b/src/services/tiny_post_html_processor/tiny_post_html_processor.service.js @@ -2,6 +2,8 @@ * This is a tiny purpose-built HTML parser/processor. This basically detects any type of visual newline and * allows it to be processed, useful for greentexting, mostly * + * known issue: doesn't handle CDATA so nested CDATA might not work well + * * @param {Object} input - input data * @param {(string) => string} processor - function that will be called on every line * @return {string} processed html @@ -22,11 +24,15 @@ export const processHtml = (html, processor) => { } const flush = () => { // Processes current line buffer, adds it to output buffer and clears line buffer - buffer += processor(textBuffer) + if (textBuffer.trim().length > 0) { + buffer += processor(textBuffer) + } else { + buffer += textBuffer + } textBuffer = '' } - const handleBr = (tag) => { // handles single newlines/linebreaks + const handleBr = (tag) => { // handles single newlines/linebreaks/selfclosing flush() buffer += tag } @@ -59,10 +65,12 @@ export const processHtml = (html, processor) => { if (handledTags.has(tagName)) { if (tagName === 'br') { handleBr(tagFull) - } - if (openCloseTags.has(tagFull)) { + } else if (openCloseTags.has(tagName)) { if (tagFull[1] === '/') { handleClose(tagFull) + } else if (tagFull[tagFull.length - 2] === '/') { + // self-closing + handleBr(tagFull) } else { handleOpen(tagFull) } @@ -76,6 +84,9 @@ export const processHtml = (html, processor) => { textBuffer += char } } + if (tagBuffer) { + textBuffer += tagBuffer + } flush() diff --git a/test/unit/specs/services/tiny_post_html_processor/tiny_post_html_processor.spec.js b/test/unit/specs/services/tiny_post_html_processor/tiny_post_html_processor.spec.js new file mode 100644 index 00000000..f301429d --- /dev/null +++ b/test/unit/specs/services/tiny_post_html_processor/tiny_post_html_processor.spec.js @@ -0,0 +1,96 @@ +import { processHtml } from 'src/services/tiny_post_html_processor/tiny_post_html_processor.service.js' + +describe('TinyPostHTMLProcessor', () => { + describe('with processor that keeps original line should not make any changes to HTML when', () => { + const processorKeep = (line) => line + it('fed with regular HTML with newlines', () => { + const inputOutput = '1<br/>2<p class="lol">3 4</p> 5 \n 6 <p > 7 <br> 8 </p> <br>\n<br/>' + expect(processHtml(inputOutput, processorKeep)).to.eql(inputOutput) + }) + + it('fed with possibly broken HTML with invalid tags/composition', () => { + const inputOutput = '<feeee dwdwddddddw> <i>ayy<b>lm</i>ao</b> </section>' + expect(processHtml(inputOutput, processorKeep)).to.eql(inputOutput) + }) + + it('fed with very broken HTML with broken composition', () => { + const inputOutput = '</p> lmao what </div> whats going on <div> wha <p>' + expect(processHtml(inputOutput, processorKeep)).to.eql(inputOutput) + }) + + it('fed with sorta valid HTML but tags aren\'t closed', () => { + const inputOutput = 'just leaving a <div> hanging' + expect(processHtml(inputOutput, processorKeep)).to.eql(inputOutput) + }) + + it('fed with not really HTML at this point... tags that aren\'t finished', () => { + const inputOutput = 'do you expect me to finish this <div class=' + expect(processHtml(inputOutput, processorKeep)).to.eql(inputOutput) + }) + + it('fed with dubiously valid HTML (p within p and also div inside p)', () => { + const inputOutput = 'look ma <p> p \nwithin <p> p! </p> and a <br/><div>div!</div></p>' + expect(processHtml(inputOutput, processorKeep)).to.eql(inputOutput) + }) + + it('fed with maybe valid HTML? self-closing divs and ps', () => { + const inputOutput = 'a <div class="what"/> what now <p aria-label="wtf"/> ?' + expect(processHtml(inputOutput, processorKeep)).to.eql(inputOutput) + }) + + it('fed with valid XHTML containing a CDATA', () => { + const inputOutput = 'Yes, it is me, <![CDATA[DIO]]>' + expect(processHtml(inputOutput, processorKeep)).to.eql(inputOutput) + }) + }) + describe('with processor that replaces lines with word "_" should match expected line when', () => { + const processorReplace = (line) => '_' + it('fed with regular HTML with newlines', () => { + const input = '1<br/>2<p class="lol">3 4</p> 5 \n 6 <p > 7 <br> 8 </p> <br>\n<br/>' + const output = '_<br/>_<p class="lol">_</p>_\n_<p >_<br>_</p> <br>\n<br/>' + expect(processHtml(input, processorReplace)).to.eql(output) + }) + + it('fed with possibly broken HTML with invalid tags/composition', () => { + const input = '<feeee dwdwddddddw> <i>ayy<b>lm</i>ao</b> </section>' + const output = '_' + expect(processHtml(input, processorReplace)).to.eql(output) + }) + + it('fed with very broken HTML with broken composition', () => { + const input = '</p> lmao what </div> whats going on <div> wha <p>' + const output = '</p>_</div>_<div>_<p>' + expect(processHtml(input, processorReplace)).to.eql(output) + }) + + it('fed with sorta valid HTML but tags aren\'t closed', () => { + const input = 'just leaving a <div> hanging' + const output = '_<div>_' + expect(processHtml(input, processorReplace)).to.eql(output) + }) + + it('fed with not really HTML at this point... tags that aren\'t finished', () => { + const input = 'do you expect me to finish this <div class=' + const output = '_' + expect(processHtml(input, processorReplace)).to.eql(output) + }) + + it('fed with dubiously valid HTML (p within p and also div inside p)', () => { + const input = 'look ma <p> p \nwithin <p> p! </p> and a <br/><div>div!</div></p>' + const output = '_<p>_\n_<p>_</p>_<br/><div>_</div></p>' + expect(processHtml(input, processorReplace)).to.eql(output) + }) + + it('fed with maybe valid HTML? self-closing divs and ps', () => { + const input = 'a <div class="what"/> what now <p aria-label="wtf"/> ?' + const output = '_<div class="what"/>_<p aria-label="wtf"/>_' + expect(processHtml(input, processorReplace)).to.eql(output) + }) + + it('fed with valid XHTML containing a CDATA', () => { + const input = 'Yes, it is me, <![CDATA[DIO]]>' + const output = '_' + expect(processHtml(input, processorReplace)).to.eql(output) + }) + }) +}) |
