diff options
| author | Henry Jameson <me@hjkos.com> | 2022-01-24 19:12:17 +0200 |
|---|---|---|
| committer | Henry Jameson <me@hjkos.com> | 2022-01-24 19:12:17 +0200 |
| commit | 9ea0f10abb195799842baf2f4b8711d1744b37bb (patch) | |
| tree | 2c64119cc7c6bc989e65189712ce81cbbb40c91a /test/unit/specs/services/html_converter | |
| parent | a96a62929d723c3676174dfd71c0db4462599a12 (diff) | |
| parent | 182fcca5da9fa284f46f5ca1c8b1790353dec316 (diff) | |
Merge remote-tracking branch 'origin/develop' into settings-and-filtering
* origin/develop: (169 commits)
Improve the user card for deactivated users
Update CHANGELOG.md
Update CHANGELOG.md
Allow canceling a follow request
Simple policy reasons for instance specific policies
entity_normalizer: Escape name when parsing user
Translated using Weblate (Spanish)
Translated using Weblate (Catalan)
Translated using Weblate (Korean)
Translated using Weblate (Japanese (ja_PEDANTIC))
Translated using Weblate (Indonesian)
Translated using Weblate (Esperanto)
Translated using Weblate (Vietnamese)
Translated using Weblate (Italian)
Translated using Weblate (Vietnamese)
Translated using Weblate (Indonesian)
Translated using Weblate (Italian)
Translated using Weblate (Vietnamese)
Translated using Weblate (Indonesian)
Translated using Weblate (Chinese (Simplified))
...
Diffstat (limited to 'test/unit/specs/services/html_converter')
3 files changed, 340 insertions, 0 deletions
diff --git a/test/unit/specs/services/html_converter/html_line_converter.spec.js b/test/unit/specs/services/html_converter/html_line_converter.spec.js new file mode 100644 index 00000000..86bd7e8b --- /dev/null +++ b/test/unit/specs/services/html_converter/html_line_converter.spec.js @@ -0,0 +1,171 @@ +import { convertHtmlToLines } from 'src/services/html_converter/html_line_converter.service.js' + +const greentextHandle = new Set(['p', 'div']) +const mapOnlyText = (processor) => (input) => { + if (input.text && input.level.every(l => greentextHandle.has(l))) { + return processor(input.text) + } else if (input.text) { + return input.text + } else { + return input + } +} + +describe('html_line_converter', () => { + describe('with processor that keeps original line should not make any changes to HTML when', () => { + const processorKeep = (line) => line + it('fed with regular HTML with newlines', () => { + const inputOutput = '1<br/>2<p class="lol">3 4</p> 5 \n 6 <p > 7 <br> 8 </p> <br>\n<br/>' + const result = convertHtmlToLines(inputOutput) + const comparableResult = result.map(mapOnlyText(processorKeep)).join('') + expect(comparableResult).to.eql(inputOutput) + }) + + it('fed with possibly broken HTML with invalid tags/composition', () => { + const inputOutput = '<feeee dwdwddddddw> <i>ayy<b>lm</i>ao</b> </section>' + const result = convertHtmlToLines(inputOutput) + const comparableResult = result.map(mapOnlyText(processorKeep)).join('') + expect(comparableResult).to.eql(inputOutput) + }) + + it('fed with very broken HTML with broken composition', () => { + const inputOutput = '</p> lmao what </div> whats going on <div> wha <p>' + const result = convertHtmlToLines(inputOutput) + const comparableResult = result.map(mapOnlyText(processorKeep)).join('') + expect(comparableResult).to.eql(inputOutput) + }) + + it('fed with sorta valid HTML but tags aren\'t closed', () => { + const inputOutput = 'just leaving a <div> hanging' + const result = convertHtmlToLines(inputOutput) + const comparableResult = result.map(mapOnlyText(processorKeep)).join('') + expect(comparableResult).to.eql(inputOutput) + }) + + it('fed with not really HTML at this point... tags that aren\'t finished', () => { + const inputOutput = 'do you expect me to finish this <div class=' + const result = convertHtmlToLines(inputOutput) + const comparableResult = result.map(mapOnlyText(processorKeep)).join('') + expect(comparableResult).to.eql(inputOutput) + }) + + it('fed with dubiously valid HTML (p within p and also div inside p)', () => { + const inputOutput = 'look ma <p> p \nwithin <p> p! </p> and a <br/><div>div!</div></p>' + const result = convertHtmlToLines(inputOutput) + const comparableResult = result.map(mapOnlyText(processorKeep)).join('') + expect(comparableResult).to.eql(inputOutput) + }) + + it('fed with maybe valid HTML? self-closing divs and ps', () => { + const inputOutput = 'a <div class="what"/> what now <p aria-label="wtf"/> ?' + const result = convertHtmlToLines(inputOutput) + const comparableResult = result.map(mapOnlyText(processorKeep)).join('') + expect(comparableResult).to.eql(inputOutput) + }) + + it('fed with valid XHTML containing a CDATA', () => { + const inputOutput = 'Yes, it is me, <![CDATA[DIO]]>' + const result = convertHtmlToLines(inputOutput) + const comparableResult = result.map(mapOnlyText(processorKeep)).join('') + expect(comparableResult).to.eql(inputOutput) + }) + + it('fed with some recognized but not handled elements', () => { + const inputOutput = 'testing images\n\n<img src="benis.png">' + const result = convertHtmlToLines(inputOutput) + const comparableResult = result.map(mapOnlyText(processorKeep)).join('') + expect(comparableResult).to.eql(inputOutput) + }) + }) + describe('with processor that replaces lines with word "_" should match expected line when', () => { + const processorReplace = (line) => '_' + it('fed with regular HTML with newlines', () => { + const input = '1<br/>2<p class="lol">3 4</p> 5 \n 6 <p > 7 <br> 8 </p> <br>\n<br/>' + const output = '_<br/>_<p class="lol">_</p>_\n_<p >_<br>_</p> <br>\n<br/>' + const result = convertHtmlToLines(input) + const comparableResult = result.map(mapOnlyText(processorReplace)).join('') + expect(comparableResult).to.eql(output) + }) + + it('fed with possibly broken HTML with invalid tags/composition', () => { + const input = '<feeee dwdwddddddw> <i>ayy<b>lm</i>ao</b> </section>' + const output = '_' + const result = convertHtmlToLines(input) + const comparableResult = result.map(mapOnlyText(processorReplace)).join('') + expect(comparableResult).to.eql(output) + }) + + it('fed with very broken HTML with broken composition', () => { + const input = '</p> lmao what </div> whats going on <div> wha <p>' + const output = '_<div>_<p>' + const result = convertHtmlToLines(input) + const comparableResult = result.map(mapOnlyText(processorReplace)).join('') + expect(comparableResult).to.eql(output) + }) + + it('fed with sorta valid HTML but tags aren\'t closed', () => { + const input = 'just leaving a <div> hanging' + const output = '_<div>_' + const result = convertHtmlToLines(input) + const comparableResult = result.map(mapOnlyText(processorReplace)).join('') + expect(comparableResult).to.eql(output) + }) + + it('fed with not really HTML at this point... tags that aren\'t finished', () => { + const input = 'do you expect me to finish this <div class=' + const output = '_' + const result = convertHtmlToLines(input) + const comparableResult = result.map(mapOnlyText(processorReplace)).join('') + expect(comparableResult).to.eql(output) + }) + + it('fed with dubiously valid HTML (p within p and also div inside p)', () => { + const input = 'look ma <p> p \nwithin <p> p! </p> and a <br/><div>div!</div></p>' + const output = '_<p>_\n_<p>_</p>_<br/><div>_</div></p>' + const result = convertHtmlToLines(input) + const comparableResult = result.map(mapOnlyText(processorReplace)).join('') + expect(comparableResult).to.eql(output) + }) + + it('fed with maybe valid HTML? (XHTML) self-closing divs and ps', () => { + const input = 'a <div class="what"/> what now <p aria-label="wtf"/> ?' + const output = '_<div class="what"/>_<p aria-label="wtf"/>_' + const result = convertHtmlToLines(input) + const comparableResult = result.map(mapOnlyText(processorReplace)).join('') + expect(comparableResult).to.eql(output) + }) + + it('fed with valid XHTML containing a CDATA', () => { + const input = 'Yes, it is me, <![CDATA[DIO]]>' + const output = '_' + const result = convertHtmlToLines(input) + const comparableResult = result.map(mapOnlyText(processorReplace)).join('') + expect(comparableResult).to.eql(output) + }) + + it('Testing handling ignored blocks', () => { + const input = ` + <pre><code>> rei = "0" + '0' + > rei == 0 + true + > rei == null + false</code></pre><blockquote>That, christian-like JS diagram but it’s evangelion instead.</blockquote> + ` + const result = convertHtmlToLines(input) + const comparableResult = result.map(mapOnlyText(processorReplace)).join('') + expect(comparableResult).to.eql(input) + }) + it('Testing handling ignored blocks 2', () => { + const input = ` + <blockquote>An SSL error has happened.</blockquote><p>Shakespeare</p> + ` + const output = ` + <blockquote>An SSL error has happened.</blockquote><p>_</p> + ` + const result = convertHtmlToLines(input) + const comparableResult = result.map(mapOnlyText(processorReplace)).join('') + expect(comparableResult).to.eql(output) + }) + }) +}) diff --git a/test/unit/specs/services/html_converter/html_tree_converter.spec.js b/test/unit/specs/services/html_converter/html_tree_converter.spec.js new file mode 100644 index 00000000..7283021b --- /dev/null +++ b/test/unit/specs/services/html_converter/html_tree_converter.spec.js @@ -0,0 +1,132 @@ +import { convertHtmlToTree } from 'src/services/html_converter/html_tree_converter.service.js' + +describe('html_tree_converter', () => { + describe('convertHtmlToTree', () => { + it('converts html into a tree structure', () => { + const input = '1 <p>2</p> <b>3<img src="a">4</b>5' + expect(convertHtmlToTree(input)).to.eql([ + '1 ', + [ + '<p>', + ['2'], + '</p>' + ], + ' ', + [ + '<b>', + [ + '3', + ['<img src="a">'], + '4' + ], + '</b>' + ], + '5' + ]) + }) + it('converts html to tree while preserving tag formatting', () => { + const input = '1 <p >2</p><b >3<img src="a">4</b>5' + expect(convertHtmlToTree(input)).to.eql([ + '1 ', + [ + '<p >', + ['2'], + '</p>' + ], + [ + '<b >', + [ + '3', + ['<img src="a">'], + '4' + ], + '</b>' + ], + '5' + ]) + }) + it('converts semi-broken html', () => { + const input = '1 <br> 2 <p> 42' + expect(convertHtmlToTree(input)).to.eql([ + '1 ', + ['<br>'], + ' 2 ', + [ + '<p>', + [' 42'] + ] + ]) + }) + it('realistic case 1', () => { + const input = '<p><span class="h-card"><a class="u-url mention" data-user="9wRC6T2ZZiKWJ0vUi8" href="https://cawfee.club/users/benis" rel="ugc">@<span>benis</span></a></span> <span class="h-card"><a class="u-url mention" data-user="194" href="https://shigusegubu.club/users/hj" rel="ugc">@<span>hj</span></a></span> nice</p>' + expect(convertHtmlToTree(input)).to.eql([ + [ + '<p>', + [ + [ + '<span class="h-card">', + [ + [ + '<a class="u-url mention" data-user="9wRC6T2ZZiKWJ0vUi8" href="https://cawfee.club/users/benis" rel="ugc">', + [ + '@', + [ + '<span>', + [ + 'benis' + ], + '</span>' + ] + ], + '</a>' + ] + ], + '</span>' + ], + ' ', + [ + '<span class="h-card">', + [ + [ + '<a class="u-url mention" data-user="194" href="https://shigusegubu.club/users/hj" rel="ugc">', + [ + '@', + [ + '<span>', + [ + 'hj' + ], + '</span>' + ] + ], + '</a>' + ] + ], + '</span>' + ], + ' nice' + ], + '</p>' + ] + ]) + }) + it('realistic case 2', () => { + const inputOutput = 'Country improv: give me a city<br/>Audience: Memphis<br/>Improv troupe: come on, a better one<br/>Audience: el paso' + expect(convertHtmlToTree(inputOutput)).to.eql([ + 'Country improv: give me a city', + [ + '<br/>' + ], + 'Audience: Memphis', + [ + '<br/>' + ], + 'Improv troupe: come on, a better one', + [ + '<br/>' + ], + 'Audience: el paso' + ]) + }) + }) +}) diff --git a/test/unit/specs/services/html_converter/utility.spec.js b/test/unit/specs/services/html_converter/utility.spec.js new file mode 100644 index 00000000..cf6fd99b --- /dev/null +++ b/test/unit/specs/services/html_converter/utility.spec.js @@ -0,0 +1,37 @@ +import { processTextForEmoji, getAttrs } from 'src/services/html_converter/utility.service.js' + +describe('html_converter utility', () => { + describe('processTextForEmoji', () => { + it('processes all emoji in text', () => { + const input = 'Hello from finland! :lol: We have best water! :lmao:' + const emojis = [ + { shortcode: 'lol', src: 'LOL' }, + { shortcode: 'lmao', src: 'LMAO' } + ] + const processor = ({ shortcode, src }) => ({ shortcode, src }) + expect(processTextForEmoji(input, emojis, processor)).to.eql([ + 'Hello from finland! ', + { shortcode: 'lol', src: 'LOL' }, + ' We have best water! ', + { shortcode: 'lmao', src: 'LMAO' } + ]) + }) + it('leaves text as is', () => { + const input = 'Number one: that\'s terror' + const emojis = [] + const processor = ({ shortcode, src }) => ({ shortcode, src }) + expect(processTextForEmoji(input, emojis, processor)).to.eql([ + 'Number one: that\'s terror' + ]) + }) + }) + + describe('getAttrs', () => { + it('extracts arguments from tag', () => { + const input = '<img src="boop" cool ebin=\'true\'>' + const output = { src: 'boop', cool: true, ebin: 'true' } + + expect(getAttrs(input)).to.eql(output) + }) + }) +}) |
