aboutsummaryrefslogtreecommitdiff
path: root/test/unit/specs/services/html_converter
diff options
context:
space:
mode:
authorHenry Jameson <me@hjkos.com>2021-06-10 18:52:01 +0300
committerHenry Jameson <me@hjkos.com>2021-06-10 18:52:01 +0300
commitcc00af7a3102034b05ebcd4aa1fd01c6f467184a (patch)
treefc2d34177416a03359a85567aa6b8c29374c2f07 /test/unit/specs/services/html_converter
parent0f73e96194fb13e70be0222a7ab718d7894b62c2 (diff)
Hellthread(tm) Certified
Diffstat (limited to 'test/unit/specs/services/html_converter')
-rw-r--r--test/unit/specs/services/html_converter/html_line_converter.spec.js130
-rw-r--r--test/unit/specs/services/html_converter/html_tree_converter.spec.js166
2 files changed, 296 insertions, 0 deletions
diff --git a/test/unit/specs/services/html_converter/html_line_converter.spec.js b/test/unit/specs/services/html_converter/html_line_converter.spec.js
new file mode 100644
index 00000000..82cb4170
--- /dev/null
+++ b/test/unit/specs/services/html_converter/html_line_converter.spec.js
@@ -0,0 +1,130 @@
+import { convertHtmlToLines } from 'src/services/html_converter/html_line_converter.service.js'
+
+const mapOnlyText = (processor) => (input) => input.text ? processor(input.text) : input
+
+describe('TinyPostHTMLProcessor', () => {
+ describe('with processor that keeps original line should not make any changes to HTML when', () => {
+ const processorKeep = (line) => line
+ it('fed with regular HTML with newlines', () => {
+ const inputOutput = '1<br/>2<p class="lol">3 4</p> 5 \n 6 <p > 7 <br> 8 </p> <br>\n<br/>'
+ const result = convertHtmlToLines(inputOutput)
+ const comparableResult = result.map(mapOnlyText(processorKeep)).join('')
+ expect(comparableResult).to.eql(inputOutput)
+ })
+
+ it('fed with possibly broken HTML with invalid tags/composition', () => {
+ const inputOutput = '<feeee dwdwddddddw> <i>ayy<b>lm</i>ao</b> </section>'
+ const result = convertHtmlToLines(inputOutput)
+ const comparableResult = result.map(mapOnlyText(processorKeep)).join('')
+ expect(comparableResult).to.eql(inputOutput)
+ })
+
+ it('fed with very broken HTML with broken composition', () => {
+ const inputOutput = '</p> lmao what </div> whats going on <div> wha <p>'
+ const result = convertHtmlToLines(inputOutput)
+ const comparableResult = result.map(mapOnlyText(processorKeep)).join('')
+ expect(comparableResult).to.eql(inputOutput)
+ })
+
+ it('fed with sorta valid HTML but tags aren\'t closed', () => {
+ const inputOutput = 'just leaving a <div> hanging'
+ const result = convertHtmlToLines(inputOutput)
+ const comparableResult = result.map(mapOnlyText(processorKeep)).join('')
+ expect(comparableResult).to.eql(inputOutput)
+ })
+
+ it('fed with not really HTML at this point... tags that aren\'t finished', () => {
+ const inputOutput = 'do you expect me to finish this <div class='
+ const result = convertHtmlToLines(inputOutput)
+ const comparableResult = result.map(mapOnlyText(processorKeep)).join('')
+ expect(comparableResult).to.eql(inputOutput)
+ })
+
+ it('fed with dubiously valid HTML (p within p and also div inside p)', () => {
+ const inputOutput = 'look ma <p> p \nwithin <p> p! </p> and a <br/><div>div!</div></p>'
+ const result = convertHtmlToLines(inputOutput)
+ const comparableResult = result.map(mapOnlyText(processorKeep)).join('')
+ expect(comparableResult).to.eql(inputOutput)
+ })
+
+ it('fed with maybe valid HTML? self-closing divs and ps', () => {
+ const inputOutput = 'a <div class="what"/> what now <p aria-label="wtf"/> ?'
+ const result = convertHtmlToLines(inputOutput)
+ const comparableResult = result.map(mapOnlyText(processorKeep)).join('')
+ expect(comparableResult).to.eql(inputOutput)
+ })
+
+ it('fed with valid XHTML containing a CDATA', () => {
+ const inputOutput = 'Yes, it is me, <![CDATA[DIO]]>'
+ const result = convertHtmlToLines(inputOutput)
+ const comparableResult = result.map(mapOnlyText(processorKeep)).join('')
+ expect(comparableResult).to.eql(inputOutput)
+ })
+ })
+ describe('with processor that replaces lines with word "_" should match expected line when', () => {
+ const processorReplace = (line) => '_'
+ it('fed with regular HTML with newlines', () => {
+ const input = '1<br/>2<p class="lol">3 4</p> 5 \n 6 <p > 7 <br> 8 </p> <br>\n<br/>'
+ const output = '_<br/>_<p class="lol">_</p>_\n_<p >_<br>_</p> <br>\n<br/>'
+ const result = convertHtmlToLines(input)
+ const comparableResult = result.map(mapOnlyText(processorReplace)).join('')
+ expect(comparableResult).to.eql(output)
+ })
+
+ it('fed with possibly broken HTML with invalid tags/composition', () => {
+ const input = '<feeee dwdwddddddw> <i>ayy<b>lm</i>ao</b> </section>'
+ const output = '_'
+ const result = convertHtmlToLines(input)
+ const comparableResult = result.map(mapOnlyText(processorReplace)).join('')
+ expect(comparableResult).to.eql(output)
+ })
+
+ it('fed with very broken HTML with broken composition', () => {
+ const input = '</p> lmao what </div> whats going on <div> wha <p>'
+ const output = '</p>_</div>_<div>_<p>'
+ const result = convertHtmlToLines(input)
+ const comparableResult = result.map(mapOnlyText(processorReplace)).join('')
+ expect(comparableResult).to.eql(output)
+ })
+
+ it('fed with sorta valid HTML but tags aren\'t closed', () => {
+ const input = 'just leaving a <div> hanging'
+ const output = '_<div>_'
+ const result = convertHtmlToLines(input)
+ const comparableResult = result.map(mapOnlyText(processorReplace)).join('')
+ expect(comparableResult).to.eql(output)
+ })
+
+ it('fed with not really HTML at this point... tags that aren\'t finished', () => {
+ const input = 'do you expect me to finish this <div class='
+ const output = '_'
+ const result = convertHtmlToLines(input)
+ const comparableResult = result.map(mapOnlyText(processorReplace)).join('')
+ expect(comparableResult).to.eql(output)
+ })
+
+ it('fed with dubiously valid HTML (p within p and also div inside p)', () => {
+ const input = 'look ma <p> p \nwithin <p> p! </p> and a <br/><div>div!</div></p>'
+ const output = '_<p>_\n_<p>_</p>_<br/><div>_</div></p>'
+ const result = convertHtmlToLines(input)
+ const comparableResult = result.map(mapOnlyText(processorReplace)).join('')
+ expect(comparableResult).to.eql(output)
+ })
+
+ it('fed with maybe valid HTML? self-closing divs and ps', () => {
+ const input = 'a <div class="what"/> what now <p aria-label="wtf"/> ?'
+ const output = '_<div class="what"/>_<p aria-label="wtf"/>_'
+ const result = convertHtmlToLines(input)
+ const comparableResult = result.map(mapOnlyText(processorReplace)).join('')
+ expect(comparableResult).to.eql(output)
+ })
+
+ it('fed with valid XHTML containing a CDATA', () => {
+ const input = 'Yes, it is me, <![CDATA[DIO]]>'
+ const output = '_'
+ const result = convertHtmlToLines(input)
+ const comparableResult = result.map(mapOnlyText(processorReplace)).join('')
+ expect(comparableResult).to.eql(output)
+ })
+ })
+})
diff --git a/test/unit/specs/services/html_converter/html_tree_converter.spec.js b/test/unit/specs/services/html_converter/html_tree_converter.spec.js
new file mode 100644
index 00000000..a54745c3
--- /dev/null
+++ b/test/unit/specs/services/html_converter/html_tree_converter.spec.js
@@ -0,0 +1,166 @@
+import { convertHtmlToTree, processTextForEmoji, getAttrs } from 'src/services/html_converter/html_tree_converter.service.js'
+
+describe('MiniHtmlConverter', () => {
+ describe('convertHtmlToTree', () => {
+ it('converts html into a tree structure', () => {
+ const input = '1 <p>2</p> <b>3<img src="a">4</b>5'
+ expect(convertHtmlToTree(input)).to.eql([
+ '1 ',
+ [
+ '<p>',
+ ['2'],
+ '</p>'
+ ],
+ ' ',
+ [
+ '<b>',
+ [
+ '3',
+ ['<img src="a">'],
+ '4'
+ ],
+ '</b>'
+ ],
+ '5'
+ ])
+ })
+ it('converts html to tree while preserving tag formatting', () => {
+ const input = '1 <p >2</p><b >3<img src="a">4</b>5'
+ expect(convertHtmlToTree(input)).to.eql([
+ '1 ',
+ [
+ '<p >',
+ ['2'],
+ '</p>'
+ ],
+ [
+ '<b >',
+ [
+ '3',
+ ['<img src="a">'],
+ '4'
+ ],
+ '</b>'
+ ],
+ '5'
+ ])
+ })
+ it('converts semi-broken html', () => {
+ const input = '1 <br> 2 <p> 42'
+ expect(convertHtmlToTree(input)).to.eql([
+ '1 ',
+ ['<br>'],
+ ' 2 ',
+ [
+ '<p>',
+ [' 42']
+ ]
+ ])
+ })
+ it('realistic case 1', () => {
+ const input = '<p><span class="h-card"><a class="u-url mention" data-user="9wRC6T2ZZiKWJ0vUi8" href="https://cawfee.club/users/benis" rel="ugc">@<span>benis</span></a></span> <span class="h-card"><a class="u-url mention" data-user="194" href="https://shigusegubu.club/users/hj" rel="ugc">@<span>hj</span></a></span> nice</p>'
+ expect(convertHtmlToTree(input)).to.eql([
+ [
+ '<p>',
+ [
+ [
+ '<span class="h-card">',
+ [
+ [
+ '<a class="u-url mention" data-user="9wRC6T2ZZiKWJ0vUi8" href="https://cawfee.club/users/benis" rel="ugc">',
+ [
+ '@',
+ [
+ '<span>',
+ [
+ 'benis'
+ ],
+ '</span>'
+ ]
+ ],
+ '</a>'
+ ]
+ ],
+ '</span>'
+ ],
+ ' ',
+ [
+ '<span class="h-card">',
+ [
+ [
+ '<a class="u-url mention" data-user="194" href="https://shigusegubu.club/users/hj" rel="ugc">',
+ [
+ '@',
+ [
+ '<span>',
+ [
+ 'hj'
+ ],
+ '</span>'
+ ]
+ ],
+ '</a>'
+ ]
+ ],
+ '</span>'
+ ],
+ ' nice'
+ ],
+ '</p>'
+ ]
+ ])
+ })
+ it('realistic case 2', () => {
+ const inputOutput = 'Country improv: give me a city<br/>Audience: Memphis<br/>Improv troupe: come on, a better one<br/>Audience: el paso'
+ expect(convertHtmlToTree(inputOutput)).to.eql([
+ 'Country improv: give me a city',
+ [
+ '<br/>'
+ ],
+ 'Audience: Memphis',
+ [
+ '<br/>'
+ ],
+ 'Improv troupe: come on, a better one',
+ [
+ '<br/>'
+ ],
+ 'Audience: el paso'
+ ])
+ })
+ })
+
+ describe('processTextForEmoji', () => {
+ it('processes all emoji in text', () => {
+ const input = 'Hello from finland! :lol: We have best water! :lmao:'
+ const emojis = [
+ { shortcode: 'lol', src: 'LOL' },
+ { shortcode: 'lmao', src: 'LMAO' }
+ ]
+ const processor = ({ shortcode, src }) => ({ shortcode, src })
+ expect(processTextForEmoji(input, emojis, processor)).to.eql([
+ 'Hello from finland! ',
+ { shortcode: 'lol', src: 'LOL' },
+ ' We have best water! ',
+ { shortcode: 'lmao', src: 'LMAO' }
+ ])
+ })
+ it('leaves text as is', () => {
+ const input = 'Number one: that\'s terror'
+ const emojis = []
+ const processor = ({ shortcode, src }) => ({ shortcode, src })
+ expect(processTextForEmoji(input, emojis, processor)).to.eql([
+ 'Number one: that\'s terror'
+ ])
+ })
+ })
+
+ describe('getAttrs', () => {
+ it('extracts arguments from tag', () => {
+ const input = '<img src="boop" cool ebin=\'true\'>'
+ const output = { src: 'boop', cool: true, ebin: 'true' }
+
+ expect(getAttrs(input)).to.eql(output)
+ })
+ })
+})