From 50dc9df8a44d408dd83ae4b17c407fa36c85cf8e Mon Sep 17 00:00:00 2001 From: Henry Jameson Date: Thu, 14 Nov 2019 00:18:14 +0200 Subject: adds greentext, also small fixes --- .../tiny_post_html_processor.service.js | 84 ++++++++++++++++++++++ 1 file changed, 84 insertions(+) create mode 100644 src/services/tiny_post_html_processor/tiny_post_html_processor.service.js (limited to 'src/services/tiny_post_html_processor') diff --git a/src/services/tiny_post_html_processor/tiny_post_html_processor.service.js b/src/services/tiny_post_html_processor/tiny_post_html_processor.service.js new file mode 100644 index 00000000..c9ff81e1 --- /dev/null +++ b/src/services/tiny_post_html_processor/tiny_post_html_processor.service.js @@ -0,0 +1,84 @@ +/** + * This is a tiny purpose-built HTML parser/processor. This basically detects any type of visual newline and + * allows it to be processed, useful for greentexting, mostly + * + * @param {Object} input - input data + * @param {(string) => string} processor - function that will be called on every line + * @return {string} processed html + */ +export const processHtml = (html, processor) => { + const handledTags = new Set(['p', 'br', 'div']) + const openCloseTags = new Set(['p', 'div']) + const tagRegex = /(?:<\/(\w+)>|<(\w+)\s?[^/]*?\/?>)/gi + + let buffer = '' // Current output buffer + const level = [] // How deep we are in tags and which tags were there + let textBuffer = '' // Current line content + let tagBuffer = null // Current tag buffer, if null = we are not currently reading a tag + + // Extracts tagname from tag, i.e. => span + const getTagName = (tag) => { + // eslint-disable-next-line no-unused-vars + const result = tagRegex.exec(tag) + return result && (result[1] || result[2]) + } + + const flush = () => { // Processes current line buffer, adds it to output buffer and clears line buffer + buffer += processor(textBuffer) + textBuffer = '' + } + + const handleBr = (tag) => { // handles single newlines/linebreaks + flush() + buffer += tag + } + + const handleOpen = (tag) => { // handles opening tags + flush() + buffer += tag + level.push(tag) + } + + const handleClose = (tag) => { // handles closing tags + flush() + buffer += tag + if (level[level.length - 1] === tag) { + level.pop() + } + } + + for (let i = 0; i < html.length; i++) { + const char = html[i] + if (char === '<' && tagBuffer !== null) { + tagBuffer = char + } else if (char !== '>' && tagBuffer !== null) { + tagBuffer += char + } else if (char === '>' && tagBuffer !== null) { + tagBuffer += char + const tagName = getTagName(tagBuffer) + if (handledTags.has(tagName)) { + if (tagName === 'br') { + handleBr(tagBuffer) + } + if (openCloseTags.has(tagBuffer)) { + if (tagBuffer[1] === '/') { + handleClose(tagBuffer) + } else { + handleOpen(tagBuffer) + } + } + } else { + textBuffer += tagBuffer + } + tagBuffer = null + } else if (char === '\n') { + handleBr(char) + } else { + textBuffer += char + } + } + + flush() + + return buffer +} -- cgit v1.2.3-70-g09d2 From 692ee0e95a852b1f803b7ae92d65cbf4f3ce3445 Mon Sep 17 00:00:00 2001 From: Henry Jameson Date: Thu, 14 Nov 2019 00:41:14 +0200 Subject: Fix regex, tag detector condition --- src/components/status/status.js | 2 +- .../tiny_post_html_processor.service.js | 25 +++++++++++----------- 2 files changed, 13 insertions(+), 14 deletions(-) (limited to 'src/services/tiny_post_html_processor') diff --git a/src/components/status/status.js b/src/components/status/status.js index 6dbb2199..416aa36a 100644 --- a/src/components/status/status.js +++ b/src/components/status/status.js @@ -43,7 +43,7 @@ const Status = { showingTall: this.inConversation && this.focused, showingLongSubject: false, error: null, - // Initial state + // not as computed because it sets the initial state which will be changed later expandingSubject: !this.$store.getters.mergedConfig.collapseMessageWithSubject, } }, diff --git a/src/services/tiny_post_html_processor/tiny_post_html_processor.service.js b/src/services/tiny_post_html_processor/tiny_post_html_processor.service.js index c9ff81e1..b96c1ccf 100644 --- a/src/services/tiny_post_html_processor/tiny_post_html_processor.service.js +++ b/src/services/tiny_post_html_processor/tiny_post_html_processor.service.js @@ -9,17 +9,15 @@ export const processHtml = (html, processor) => { const handledTags = new Set(['p', 'br', 'div']) const openCloseTags = new Set(['p', 'div']) - const tagRegex = /(?:<\/(\w+)>|<(\w+)\s?[^/]*?\/?>)/gi let buffer = '' // Current output buffer const level = [] // How deep we are in tags and which tags were there let textBuffer = '' // Current line content let tagBuffer = null // Current tag buffer, if null = we are not currently reading a tag - // Extracts tagname from tag, i.e. => span + // Extracts tag name from tag, i.e. => span const getTagName = (tag) => { - // eslint-disable-next-line no-unused-vars - const result = tagRegex.exec(tag) + const result = /(?:<\/(\w+)>|<(\w+)\s?[^/]*?\/?>)/gi.exec(tag) return result && (result[1] || result[2]) } @@ -49,28 +47,29 @@ export const processHtml = (html, processor) => { for (let i = 0; i < html.length; i++) { const char = html[i] - if (char === '<' && tagBuffer !== null) { + if (char === '<' && tagBuffer === null) { tagBuffer = char } else if (char !== '>' && tagBuffer !== null) { tagBuffer += char } else if (char === '>' && tagBuffer !== null) { tagBuffer += char - const tagName = getTagName(tagBuffer) + const tagFull = tagBuffer + tagBuffer = null + const tagName = getTagName(tagFull) if (handledTags.has(tagName)) { if (tagName === 'br') { - handleBr(tagBuffer) + handleBr(tagFull) } - if (openCloseTags.has(tagBuffer)) { - if (tagBuffer[1] === '/') { - handleClose(tagBuffer) + if (openCloseTags.has(tagFull)) { + if (tagFull[1] === '/') { + handleClose(tagFull) } else { - handleOpen(tagBuffer) + handleOpen(tagFull) } } } else { - textBuffer += tagBuffer + textBuffer += tagFull } - tagBuffer = null } else if (char === '\n') { handleBr(char) } else { -- cgit v1.2.3-70-g09d2 From bd2a682b83743311645241fe644e853e1a359b67 Mon Sep 17 00:00:00 2001 From: Henry Jameson Date: Thu, 14 Nov 2019 22:40:20 +0200 Subject: tests + updates --- .../tiny_post_html_processor.service.js | 19 ++++- .../tiny_post_html_processor.spec.js | 96 ++++++++++++++++++++++ 2 files changed, 111 insertions(+), 4 deletions(-) create mode 100644 test/unit/specs/services/tiny_post_html_processor/tiny_post_html_processor.spec.js (limited to 'src/services/tiny_post_html_processor') diff --git a/src/services/tiny_post_html_processor/tiny_post_html_processor.service.js b/src/services/tiny_post_html_processor/tiny_post_html_processor.service.js index b96c1ccf..de6f20ef 100644 --- a/src/services/tiny_post_html_processor/tiny_post_html_processor.service.js +++ b/src/services/tiny_post_html_processor/tiny_post_html_processor.service.js @@ -2,6 +2,8 @@ * This is a tiny purpose-built HTML parser/processor. This basically detects any type of visual newline and * allows it to be processed, useful for greentexting, mostly * + * known issue: doesn't handle CDATA so nested CDATA might not work well + * * @param {Object} input - input data * @param {(string) => string} processor - function that will be called on every line * @return {string} processed html @@ -22,11 +24,15 @@ export const processHtml = (html, processor) => { } const flush = () => { // Processes current line buffer, adds it to output buffer and clears line buffer - buffer += processor(textBuffer) + if (textBuffer.trim().length > 0) { + buffer += processor(textBuffer) + } else { + buffer += textBuffer + } textBuffer = '' } - const handleBr = (tag) => { // handles single newlines/linebreaks + const handleBr = (tag) => { // handles single newlines/linebreaks/selfclosing flush() buffer += tag } @@ -59,10 +65,12 @@ export const processHtml = (html, processor) => { if (handledTags.has(tagName)) { if (tagName === 'br') { handleBr(tagFull) - } - if (openCloseTags.has(tagFull)) { + } else if (openCloseTags.has(tagName)) { if (tagFull[1] === '/') { handleClose(tagFull) + } else if (tagFull[tagFull.length - 2] === '/') { + // self-closing + handleBr(tagFull) } else { handleOpen(tagFull) } @@ -76,6 +84,9 @@ export const processHtml = (html, processor) => { textBuffer += char } } + if (tagBuffer) { + textBuffer += tagBuffer + } flush() diff --git a/test/unit/specs/services/tiny_post_html_processor/tiny_post_html_processor.spec.js b/test/unit/specs/services/tiny_post_html_processor/tiny_post_html_processor.spec.js new file mode 100644 index 00000000..f301429d --- /dev/null +++ b/test/unit/specs/services/tiny_post_html_processor/tiny_post_html_processor.spec.js @@ -0,0 +1,96 @@ +import { processHtml } from 'src/services/tiny_post_html_processor/tiny_post_html_processor.service.js' + +describe('TinyPostHTMLProcessor', () => { + describe('with processor that keeps original line should not make any changes to HTML when', () => { + const processorKeep = (line) => line + it('fed with regular HTML with newlines', () => { + const inputOutput = '1
2

3 4

5 \n 6

7
8


\n
' + expect(processHtml(inputOutput, processorKeep)).to.eql(inputOutput) + }) + + it('fed with possibly broken HTML with invalid tags/composition', () => { + const inputOutput = ' ayylmao ' + expect(processHtml(inputOutput, processorKeep)).to.eql(inputOutput) + }) + + it('fed with very broken HTML with broken composition', () => { + const inputOutput = '

lmao what whats going on
wha

' + expect(processHtml(inputOutput, processorKeep)).to.eql(inputOutput) + }) + + it('fed with sorta valid HTML but tags aren\'t closed', () => { + const inputOutput = 'just leaving a

hanging' + expect(processHtml(inputOutput, processorKeep)).to.eql(inputOutput) + }) + + it('fed with not really HTML at this point... tags that aren\'t finished', () => { + const inputOutput = 'do you expect me to finish this
{ + const inputOutput = 'look ma

p \nwithin

p!

and a
div!

' + expect(processHtml(inputOutput, processorKeep)).to.eql(inputOutput) + }) + + it('fed with maybe valid HTML? self-closing divs and ps', () => { + const inputOutput = 'a
what now

?' + expect(processHtml(inputOutput, processorKeep)).to.eql(inputOutput) + }) + + it('fed with valid XHTML containing a CDATA', () => { + const inputOutput = 'Yes, it is me, ' + expect(processHtml(inputOutput, processorKeep)).to.eql(inputOutput) + }) + }) + describe('with processor that replaces lines with word "_" should match expected line when', () => { + const processorReplace = (line) => '_' + it('fed with regular HTML with newlines', () => { + const input = '1
2

3 4

5 \n 6

7
8


\n
' + const output = '_
_

_

_\n_

_
_


\n
' + expect(processHtml(input, processorReplace)).to.eql(output) + }) + + it('fed with possibly broken HTML with invalid tags/composition', () => { + const input = ' ayylmao ' + const output = '_' + expect(processHtml(input, processorReplace)).to.eql(output) + }) + + it('fed with very broken HTML with broken composition', () => { + const input = '

lmao what
whats going on
wha

' + const output = '

_
_
_

' + expect(processHtml(input, processorReplace)).to.eql(output) + }) + + it('fed with sorta valid HTML but tags aren\'t closed', () => { + const input = 'just leaving a

hanging' + const output = '_
_' + expect(processHtml(input, processorReplace)).to.eql(output) + }) + + it('fed with not really HTML at this point... tags that aren\'t finished', () => { + const input = 'do you expect me to finish this
{ + const input = 'look ma

p \nwithin

p!

and a
div!

' + const output = '_

_\n_

_

_
_

' + expect(processHtml(input, processorReplace)).to.eql(output) + }) + + it('fed with maybe valid HTML? self-closing divs and ps', () => { + const input = 'a
what now

?' + const output = '_

_

_' + expect(processHtml(input, processorReplace)).to.eql(output) + }) + + it('fed with valid XHTML containing a CDATA', () => { + const input = 'Yes, it is me, ' + const output = '_' + expect(processHtml(input, processorReplace)).to.eql(output) + }) + }) +}) -- cgit v1.2.3-70-g09d2