From cc00af7a3102034b05ebcd4aa1fd01c6f467184a Mon Sep 17 00:00:00 2001 From: Henry Jameson Date: Thu, 10 Jun 2021 18:52:01 +0300 Subject: Hellthread(tm) Certified --- .../html_converter/html_line_converter.service.js | 102 ++++++++++++++ .../html_converter/html_tree_converter.service.js | 146 +++++++++++++++++++++ 2 files changed, 248 insertions(+) create mode 100644 src/services/html_converter/html_line_converter.service.js create mode 100644 src/services/html_converter/html_tree_converter.service.js (limited to 'src/services/html_converter') diff --git a/src/services/html_converter/html_line_converter.service.js b/src/services/html_converter/html_line_converter.service.js new file mode 100644 index 00000000..80482c9a --- /dev/null +++ b/src/services/html_converter/html_line_converter.service.js @@ -0,0 +1,102 @@ +/** + * This is a tiny purpose-built HTML parser/processor. This basically detects + * any type of visual newline and converts entire HTML into a array structure. + * + * Text nodes are represented as object with single property - text - containing + * the visual line. Intended usage is to process the array with .map() in which + * map function returns a string and resulting array can be converted back to html + * with a .join(''). + * + * Generally this isn't very useful except for when you really need to either + * modify visual lines (greentext i.e. simple quoting) or do something with + * first/last line. + * + * known issue: doesn't handle CDATA so nested CDATA might not work well + * + * @param {Object} input - input data + * @return {(string|{ text: string })[]} processed html in form of a list. + */ +export const convertHtmlToLines = (html) => { + const handledTags = new Set(['p', 'br', 'div']) + const openCloseTags = new Set(['p', 'div']) + + let buffer = [] // Current output buffer + const level = [] // How deep we are in tags and which tags were there + let textBuffer = '' // Current line content + let tagBuffer = null // Current tag buffer, if null = we are not currently reading a tag + + // Extracts tag name from tag, i.e. => span + const getTagName = (tag) => { + const result = /(?:<\/(\w+)>|<(\w+)\s?[^/]*?\/?>)/gi.exec(tag) + return result && (result[1] || result[2]) + } + + const flush = () => { // Processes current line buffer, adds it to output buffer and clears line buffer + if (textBuffer.trim().length > 0) { + buffer.push({ text: textBuffer }) + } else { + buffer.push(textBuffer) + } + textBuffer = '' + } + + const handleBr = (tag) => { // handles single newlines/linebreaks/selfclosing + flush() + buffer.push(tag) + } + + const handleOpen = (tag) => { // handles opening tags + flush() + buffer.push(tag) + level.push(tag) + } + + const handleClose = (tag) => { // handles closing tags + flush() + buffer.push(tag) + if (level[level.length - 1] === tag) { + level.pop() + } + } + + for (let i = 0; i < html.length; i++) { + const char = html[i] + if (char === '<' && tagBuffer === null) { + tagBuffer = char + } else if (char !== '>' && tagBuffer !== null) { + tagBuffer += char + } else if (char === '>' && tagBuffer !== null) { + tagBuffer += char + const tagFull = tagBuffer + tagBuffer = null + const tagName = getTagName(tagFull) + if (handledTags.has(tagName)) { + if (tagName === 'br') { + handleBr(tagFull) + } else if (openCloseTags.has(tagName)) { + if (tagFull[1] === '/') { + handleClose(tagFull) + } else if (tagFull[tagFull.length - 2] === '/') { + // self-closing + handleBr(tagFull) + } else { + handleOpen(tagFull) + } + } + } else { + textBuffer += tagFull + } + } else if (char === '\n') { + handleBr(char) + } else { + textBuffer += char + } + } + if (tagBuffer) { + textBuffer += tagBuffer + } + + flush() + + return buffer +} diff --git a/src/services/html_converter/html_tree_converter.service.js b/src/services/html_converter/html_tree_converter.service.js new file mode 100644 index 00000000..badd473a --- /dev/null +++ b/src/services/html_converter/html_tree_converter.service.js @@ -0,0 +1,146 @@ +/** + * This is a not-so-tiny purpose-built HTML parser/processor. This parses html + * and converts it into a tree structure representing tag openers/closers and + * children. + * + * Structure follows this pattern: [opener, [...children], closer] except root + * node which is just [...children]. Text nodes can only be within children and + * are represented as strings. + * + * Intended use is to convert HTML structure and then recursively iterate over it + * most likely using a map. Very useful for dynamically rendering html replacing + * tags with JSX elements in a render function. + * + * known issue: doesn't handle CDATA so CDATA might not work well + * known issue: doesn't handle HTML comments + * + * @param {Object} input - input data + * @return {string} processed html + */ +export const convertHtmlToTree = (html) => { + // Elements that are implicitly self-closing + // https://developer.mozilla.org/en-US/docs/Glossary/empty_element + const emptyElements = new Set([ + 'area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', + 'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr' + ]) + // TODO For future - also parse HTML5 multi-source components? + + const buffer = [] // Current output buffer + const levels = [['', buffer]] // How deep we are in tags and which tags were there + let textBuffer = '' // Current line content + let tagBuffer = null // Current tag buffer, if null = we are not currently reading a tag + + const getCurrentBuffer = () => { + return levels[levels.length - 1][1] + } + + const flushText = () => { // Processes current line buffer, adds it to output buffer and clears line buffer + if (textBuffer === '') return + getCurrentBuffer().push(textBuffer) + textBuffer = '' + } + + const handleSelfClosing = (tag) => { + getCurrentBuffer().push([tag]) + } + + const handleOpen = (tag) => { + const curBuf = getCurrentBuffer() + const newLevel = [tag, []] + levels.push(newLevel) + curBuf.push(newLevel) + } + + const handleClose = (tag) => { + const currentTag = levels[levels.length - 1] + if (getTagName(levels[levels.length - 1][0]) === getTagName(tag)) { + currentTag.push(tag) + levels.pop() + } else { + getCurrentBuffer().push(tag) + } + } + + for (let i = 0; i < html.length; i++) { + const char = html[i] + if (char === '<' && tagBuffer === null) { + flushText() + tagBuffer = char + } else if (char !== '>' && tagBuffer !== null) { + tagBuffer += char + } else if (char === '>' && tagBuffer !== null) { + tagBuffer += char + const tagFull = tagBuffer + tagBuffer = null + const tagName = getTagName(tagFull) + if (tagFull[1] === '/') { + handleClose(tagFull) + } else if (emptyElements.has(tagName) || tagFull[tagFull.length - 2] === '/') { + // self-closing + handleSelfClosing(tagFull) + } else { + handleOpen(tagFull) + } + } else { + textBuffer += char + } + } + if (tagBuffer) { + textBuffer += tagBuffer + } + + flushText() + return buffer +} + +// Extracts tag name from tag, i.e. => span +export const getTagName = (tag) => { + const result = /(?:<\/(\w+)>|<(\w+)\s?.*?\/?>)/gi.exec(tag) + return result && (result[1] || result[2]) +} + +export const processTextForEmoji = (text, emojis, processor) => { + const buffer = [] + let textBuffer = '' + for (let i = 0; i < text.length; i++) { + const char = text[i] + if (char === ':') { + const next = text.slice(i + 1) + let found = false + for (let emoji of emojis) { + if (next.slice(0, emoji.shortcode.length + 1) === (emoji.shortcode + ':')) { + found = emoji + break + } + } + if (found) { + buffer.push(textBuffer) + textBuffer = '' + buffer.push(processor(found)) + i += found.shortcode.length + 1 + } else { + textBuffer += char + } + } else { + textBuffer += char + } + } + if (textBuffer) buffer.push(textBuffer) + return buffer +} + +export const getAttrs = tag => { + const innertag = tag + .substring(1, tag.length - 1) + .replace(new RegExp('^' + getTagName(tag)), '') + .replace(/\/?$/, '') + .trim() + const attrs = Array.from(innertag.matchAll(/([a-z0-9-]+)(?:=("[^"]+?"|'[^']+?'))?/gi)) + .map(([trash, key, value]) => [key, value]) + .map(([k, v]) => { + if (!v) return [k, true] + return [k, v.substring(1, v.length - 1)] + }) + return Object.fromEntries(attrs) +} -- cgit v1.2.3-70-g09d2 From 5834790d0b38d487e314e8419509d162abbd6a80 Mon Sep 17 00:00:00 2001 From: Henry Jameson Date: Fri, 11 Jun 2021 11:50:05 +0300 Subject: fix #935 --- .../html_converter/html_line_converter.service.js | 13 +++++------ .../html_converter/html_line_converter.spec.js | 25 ++++++++++++++++++++++ 2 files changed, 32 insertions(+), 6 deletions(-) (limited to 'src/services/html_converter') diff --git a/src/services/html_converter/html_line_converter.service.js b/src/services/html_converter/html_line_converter.service.js index 80482c9a..d8f5ecb8 100644 --- a/src/services/html_converter/html_line_converter.service.js +++ b/src/services/html_converter/html_line_converter.service.js @@ -17,8 +17,9 @@ * @return {(string|{ text: string })[]} processed html in form of a list. */ export const convertHtmlToLines = (html) => { - const handledTags = new Set(['p', 'br', 'div']) - const openCloseTags = new Set(['p', 'div']) + const ignoredTags = new Set(['code', 'blockquote']) + const handledTags = new Set(['p', 'br', 'div', 'pre', 'code', 'blockquote']) + const openCloseTags = new Set(['p', 'div', 'pre', 'code', 'blockquote']) let buffer = [] // Current output buffer const level = [] // How deep we are in tags and which tags were there @@ -32,7 +33,7 @@ export const convertHtmlToLines = (html) => { } const flush = () => { // Processes current line buffer, adds it to output buffer and clears line buffer - if (textBuffer.trim().length > 0) { + if (textBuffer.trim().length > 0 && !level.some(l => ignoredTags.has(l))) { buffer.push({ text: textBuffer }) } else { buffer.push(textBuffer) @@ -48,14 +49,14 @@ export const convertHtmlToLines = (html) => { const handleOpen = (tag) => { // handles opening tags flush() buffer.push(tag) - level.push(tag) + level.unshift(getTagName(tag)) } const handleClose = (tag) => { // handles closing tags flush() buffer.push(tag) - if (level[level.length - 1] === tag) { - level.pop() + if (level[0] === getTagName(tag)) { + level.shift() } } diff --git a/test/unit/specs/services/html_converter/html_line_converter.spec.js b/test/unit/specs/services/html_converter/html_line_converter.spec.js index 82cb4170..532ea187 100644 --- a/test/unit/specs/services/html_converter/html_line_converter.spec.js +++ b/test/unit/specs/services/html_converter/html_line_converter.spec.js @@ -126,5 +126,30 @@ describe('TinyPostHTMLProcessor', () => { const comparableResult = result.map(mapOnlyText(processorReplace)).join('') expect(comparableResult).to.eql(output) }) + + it('Testing handling ignored blocks', () => { + const input = ` +
> rei = "0"
+      '0'
+      > rei == 0
+      true
+      > rei == null
+      false
That, christian-like JS diagram but it’s evangelion instead.
+ ` + const result = convertHtmlToLines(input) + const comparableResult = result.map(mapOnlyText(processorReplace)).join('') + expect(comparableResult).to.eql(input) + }) + it('Testing handling ignored blocks 2', () => { + const input = ` +
An SSL error has happened.

Shakespeare

+ ` + const output = ` +
An SSL error has happened.

_

+ ` + const result = convertHtmlToLines(input) + const comparableResult = result.map(mapOnlyText(processorReplace)).join('') + expect(comparableResult).to.eql(output) + }) }) }) -- cgit v1.2.3-70-g09d2 From cd4455675024a3dfc8930184114d5f92438d0466 Mon Sep 17 00:00:00 2001 From: Henry Jameson Date: Sat, 12 Jun 2021 19:47:23 +0300 Subject: restructure and tests squash! restructure and tests --- src/components/rich_content/rich_content.jsx | 17 +- src/components/status_body/status_body.vue | 1 - .../html_converter/html_line_converter.service.js | 8 +- .../html_converter/html_tree_converter.service.js | 53 +-- src/services/html_converter/utility.service.js | 73 +++++ test/unit/specs/components/rich_content.spec.js | 357 +++++++++++++++++++++ .../html_converter/html_line_converter.spec.js | 2 +- .../html_converter/html_tree_converter.spec.js | 38 +-- .../specs/services/html_converter/utility.spec.js | 37 +++ 9 files changed, 481 insertions(+), 105 deletions(-) create mode 100644 src/services/html_converter/utility.service.js create mode 100644 test/unit/specs/components/rich_content.spec.js create mode 100644 test/unit/specs/services/html_converter/utility.spec.js (limited to 'src/services/html_converter') diff --git a/src/components/rich_content/rich_content.jsx b/src/components/rich_content/rich_content.jsx index ad77d615..ef15aaeb 100644 --- a/src/components/rich_content/rich_content.jsx +++ b/src/components/rich_content/rich_content.jsx @@ -1,6 +1,7 @@ import Vue from 'vue' import { unescape, flattenDeep } from 'lodash' -import { convertHtmlToTree, getTagName, processTextForEmoji, getAttrs } from 'src/services/html_converter/html_tree_converter.service.js' +import { getTagName, processTextForEmoji, getAttrs } from 'src/services/html_converter/utility.service.js' +import { convertHtmlToTree } from 'src/services/html_converter/html_tree_converter.service.js' import { convertHtmlToLines } from 'src/services/html_converter/html_line_converter.service.js' import StillImage from 'src/components/still-image/still-image.vue' import MentionLink from 'src/components/mention_link/mention_link.vue' @@ -31,18 +32,12 @@ export default Vue.component('RichContent', { required: false, type: Boolean, default: false - }, - // Whether to hide last mentions (hellthreads) - hideMentions: { - required: false, - type: Boolean, - default: false } }, // NEVER EVER TOUCH DATA INSIDE RENDER render (h) { // Pre-process HTML - const { newHtml: html, lastMentions } = preProcessPerLine(this.html, this.greentext, this.hideMentions) + const { newHtml: html, lastMentions } = preProcessPerLine(this.html, this.greentext, this.handleLinks) const firstMentions = [] // Mentions that appear in the beginning of post body const lastTags = [] // Tags that appear at the end of post body const writtenMentions = [] // All mentions that appear in post body @@ -228,8 +223,9 @@ const getLinkData = (attrs, children, index) => { * * @param {String} html - raw HTML to process * @param {Boolean} greentext - whether to enable greentexting or not + * @param {Boolean} handleLinks - whether to handle links or not */ -export const preProcessPerLine = (html, greentext) => { +export const preProcessPerLine = (html, greentext, handleLinks) => { const lastMentions = [] let nonEmptyIndex = 0 @@ -264,6 +260,7 @@ export const preProcessPerLine = (html, greentext) => { const tag = getTagName(opener) // If we have a link we probably have mentions if (tag === 'a') { + if (!handleLinks) return [opener, children, closer] const attrs = getAttrs(opener) if (attrs['class'] && attrs['class'].includes('mention')) { // Got mentions @@ -297,7 +294,7 @@ export const preProcessPerLine = (html, greentext) => { const result = [...tree].map(process) // Only check last (first since list is reversed) line - if (hasMentions && !hasLooseText && nonEmptyIndex++ === 0) { + if (handleLinks && hasMentions && !hasLooseText && nonEmptyIndex++ === 0) { let mentionIndex = 0 const process = (item) => { if (Array.isArray(item)) { diff --git a/src/components/status_body/status_body.vue b/src/components/status_body/status_body.vue index b84541d7..aac44e42 100644 --- a/src/components/status_body/status_body.vue +++ b/src/components/status_body/status_body.vue @@ -52,7 +52,6 @@ :html="status.raw_html" :emoji="status.emojis" :handle-links="true" - :hide-mentions="hideMentions" :greentext="mergedConfig.greentext" @parseReady="setHeadTailLinks" /> diff --git a/src/services/html_converter/html_line_converter.service.js b/src/services/html_converter/html_line_converter.service.js index d8f5ecb8..e448d5cd 100644 --- a/src/services/html_converter/html_line_converter.service.js +++ b/src/services/html_converter/html_line_converter.service.js @@ -1,3 +1,5 @@ +import { getTagName } from './utility.service.js' + /** * This is a tiny purpose-built HTML parser/processor. This basically detects * any type of visual newline and converts entire HTML into a array structure. @@ -26,12 +28,6 @@ export const convertHtmlToLines = (html) => { let textBuffer = '' // Current line content let tagBuffer = null // Current tag buffer, if null = we are not currently reading a tag - // Extracts tag name from tag, i.e. => span - const getTagName = (tag) => { - const result = /(?:<\/(\w+)>|<(\w+)\s?[^/]*?\/?>)/gi.exec(tag) - return result && (result[1] || result[2]) - } - const flush = () => { // Processes current line buffer, adds it to output buffer and clears line buffer if (textBuffer.trim().length > 0 && !level.some(l => ignoredTags.has(l))) { buffer.push({ text: textBuffer }) diff --git a/src/services/html_converter/html_tree_converter.service.js b/src/services/html_converter/html_tree_converter.service.js index badd473a..804d35d7 100644 --- a/src/services/html_converter/html_tree_converter.service.js +++ b/src/services/html_converter/html_tree_converter.service.js @@ -1,3 +1,5 @@ +import { getTagName } from './utility.service.js' + /** * This is a not-so-tiny purpose-built HTML parser/processor. This parses html * and converts it into a tree structure representing tag openers/closers and @@ -93,54 +95,3 @@ export const convertHtmlToTree = (html) => { flushText() return buffer } - -// Extracts tag name from tag, i.e. => span -export const getTagName = (tag) => { - const result = /(?:<\/(\w+)>|<(\w+)\s?.*?\/?>)/gi.exec(tag) - return result && (result[1] || result[2]) -} - -export const processTextForEmoji = (text, emojis, processor) => { - const buffer = [] - let textBuffer = '' - for (let i = 0; i < text.length; i++) { - const char = text[i] - if (char === ':') { - const next = text.slice(i + 1) - let found = false - for (let emoji of emojis) { - if (next.slice(0, emoji.shortcode.length + 1) === (emoji.shortcode + ':')) { - found = emoji - break - } - } - if (found) { - buffer.push(textBuffer) - textBuffer = '' - buffer.push(processor(found)) - i += found.shortcode.length + 1 - } else { - textBuffer += char - } - } else { - textBuffer += char - } - } - if (textBuffer) buffer.push(textBuffer) - return buffer -} - -export const getAttrs = tag => { - const innertag = tag - .substring(1, tag.length - 1) - .replace(new RegExp('^' + getTagName(tag)), '') - .replace(/\/?$/, '') - .trim() - const attrs = Array.from(innertag.matchAll(/([a-z0-9-]+)(?:=("[^"]+?"|'[^']+?'))?/gi)) - .map(([trash, key, value]) => [key, value]) - .map(([k, v]) => { - if (!v) return [k, true] - return [k, v.substring(1, v.length - 1)] - }) - return Object.fromEntries(attrs) -} diff --git a/src/services/html_converter/utility.service.js b/src/services/html_converter/utility.service.js new file mode 100644 index 00000000..4d0c36c2 --- /dev/null +++ b/src/services/html_converter/utility.service.js @@ -0,0 +1,73 @@ +/** + * Extract tag name from tag opener/closer. + * + * @param {String} tag - tag string, i.e. '' + * @return {String} - tagname, i.e. "div" + */ +export const getTagName = (tag) => { + const result = /(?:<\/(\w+)>|<(\w+)\s?.*?\/?>)/gi.exec(tag) + return result && (result[1] || result[2]) +} + +/** + * Extract attributes from tag opener. + * + * @param {String} tag - tag string, i.e. '' + * @return {Object} - map of attributes key = attribute name, value = attribute value + * attributes without values represented as boolean true + */ +export const getAttrs = tag => { + const innertag = tag + .substring(1, tag.length - 1) + .replace(new RegExp('^' + getTagName(tag)), '') + .replace(/\/?$/, '') + .trim() + const attrs = Array.from(innertag.matchAll(/([a-z0-9-]+)(?:=("[^"]+?"|'[^']+?'))?/gi)) + .map(([trash, key, value]) => [key, value]) + .map(([k, v]) => { + if (!v) return [k, true] + return [k, v.substring(1, v.length - 1)] + }) + return Object.fromEntries(attrs) +} + +/** + * Finds shortcodes in text + * + * @param {String} text - original text to find emojis in + * @param {{ url: String, shortcode: Sring }[]} emoji - list of shortcodes to find + * @param {Function} processor - function to call on each encountered emoji, + * function is passed single object containing matching emoji ({ url, shortcode }) + * return value will be inserted into resulting array instead of :shortcode: + * @return {Array} resulting array with non-emoji parts of text and whatever {processor} + * returned for emoji + */ +export const processTextForEmoji = (text, emojis, processor) => { + const buffer = [] + let textBuffer = '' + for (let i = 0; i < text.length; i++) { + const char = text[i] + if (char === ':') { + const next = text.slice(i + 1) + let found = false + for (let emoji of emojis) { + if (next.slice(0, emoji.shortcode.length + 1) === (emoji.shortcode + ':')) { + found = emoji + break + } + } + if (found) { + buffer.push(textBuffer) + textBuffer = '' + buffer.push(processor(found)) + i += found.shortcode.length + 1 + } else { + textBuffer += char + } + } else { + textBuffer += char + } + } + if (textBuffer) buffer.push(textBuffer) + return buffer +} diff --git a/test/unit/specs/components/rich_content.spec.js b/test/unit/specs/components/rich_content.spec.js new file mode 100644 index 00000000..05c0b259 --- /dev/null +++ b/test/unit/specs/components/rich_content.spec.js @@ -0,0 +1,357 @@ +import { shallowMount, createLocalVue } from '@vue/test-utils' +import RichContent from 'src/components/rich_content/rich_content.jsx' + +const localVue = createLocalVue() + +const makeMention = (who) => `@${who}` +const stubMention = (who) => `` +const lastMentions = (...data) => `${data.join('')}` +const p = (...data) => `

${data.join('')}

` +const compwrap = (...data) => `${data.join('')}` +const removedMentionSpan = '' + +describe('RichContent', () => { + it('renders simple post without exploding', () => { + const html = p('Hello world!') + const wrapper = shallowMount(RichContent, { + localVue, + propsData: { + handleLinks: true, + greentext: true, + emoji: [], + html + } + }) + + expect(wrapper.html()).to.eql(compwrap(html)) + }) + + it('removes mentions from the beginning of post', () => { + const html = p( + makeMention('John'), + ' how are you doing thoday?' + ) + const expected = p( + removedMentionSpan, + 'how are you doing thoday?' + ) + const wrapper = shallowMount(RichContent, { + localVue, + propsData: { + handleLinks: true, + greentext: true, + emoji: [], + html + } + }) + + expect(wrapper.html()).to.eql(compwrap(expected)) + }) + + it('removes mentions from the end of the hellpost (

)', () => { + const html = [ + p('How are you doing today, fine gentlemen?'), + p( + makeMention('John'), + makeMention('Josh'), + makeMention('Jeremy') + ) + ].join('') + const expected = [ + p( + 'How are you doing today, fine gentlemen?' + ), + // TODO fix this extra line somehow? + p() + ].join('') + + const wrapper = shallowMount(RichContent, { + localVue, + propsData: { + handleLinks: true, + greentext: true, + emoji: [], + html + } + }) + + expect(wrapper.html()).to.eql(compwrap(expected)) + }) + + it('removes mentions from the end of the hellpost (
)', () => { + const html = [ + 'How are you doing today, fine gentlemen?', + [ + makeMention('John'), + makeMention('Josh'), + makeMention('Jeremy') + ].join('') + ].join('
') + const expected = [ + 'How are you doing today, fine gentlemen?', + // TODO fix this extra line somehow? + '
' + ].join('') + + const wrapper = shallowMount(RichContent, { + localVue, + propsData: { + handleLinks: true, + greentext: true, + emoji: [], + html + } + }) + + expect(wrapper.html()).to.eql(compwrap(expected)) + }) + + it('removes mentions from the end of the hellpost (\\n)', () => { + const html = [ + 'How are you doing today, fine gentlemen?', + [ + makeMention('John'), + makeMention('Josh'), + makeMention('Jeremy') + ].join('') + ].join('\n') + const expected = [ + 'How are you doing today, fine gentlemen?', + // TODO fix this extra line somehow? + '' + ].join('\n') + + const wrapper = shallowMount(RichContent, { + localVue, + propsData: { + handleLinks: true, + greentext: true, + emoji: [], + html + } + }) + + expect(wrapper.html()).to.eql(compwrap(expected)) + }) + + it('Does not remove mentions in the middle or at the end of text string', () => { + const html = [ + [ + makeMention('Jack'), + 'let\'s meet up with ', + makeMention('Janet') + ].join(''), + [ + 'cc: ', + makeMention('John'), + makeMention('Josh'), + makeMention('Jeremy') + ].join('') + ].join('\n') + const expected = [ + [ + removedMentionSpan, + 'let\'s meet up with ', + stubMention('Janet') + ].join(''), + [ + 'cc: ', + stubMention('John'), + stubMention('Josh'), + stubMention('Jeremy') + ].join('') + ].join('\n') + + const wrapper = shallowMount(RichContent, { + localVue, + propsData: { + handleLinks: true, + greentext: true, + emoji: [], + html + } + }) + + expect(wrapper.html()).to.eql(compwrap(expected)) + }) + + it('removes mentions from the end if there\'s only one first mention', () => { + const html = [ + p( + makeMention('Todd'), + 'so anyway you are wrong' + ), + p( + makeMention('Tom'), + makeMention('Trace'), + makeMention('Theodor') + ) + ].join('') + const expected = [ + p( + removedMentionSpan, + 'so anyway you are wrong' + ), + // TODO fix this extra line somehow? + p() + ].join('') + + const wrapper = shallowMount(RichContent, { + localVue, + propsData: { + handleLinks: true, + greentext: true, + emoji: [], + html + } + }) + + expect(wrapper.html()).to.eql(compwrap(expected)) + }) + + it('does not remove mentions from the end if there\'s more than one first mention', () => { + const html = [ + p( + makeMention('Zacharie'), + makeMention('Zinaide'), + 'you guys have cool names, and so do these guys: ' + ), + p( + makeMention('Watson'), + makeMention('Wallace'), + makeMention('Wakamoto') + ) + ].join('') + const expected = [ + p( + removedMentionSpan, + removedMentionSpan, + 'you guys have cool names, and so do these guys: ' + ), + p( + lastMentions( + stubMention('Watson'), + stubMention('Wallace'), + stubMention('Wakamoto') + ) + ) + ].join('') + + const wrapper = shallowMount(RichContent, { + localVue, + propsData: { + handleLinks: true, + greentext: true, + emoji: [], + html + } + }) + + expect(wrapper.html()).to.eql(compwrap(expected)) + }) + + it('Does not touch links if link handling is disabled', () => { + const html = [ + [ + makeMention('Jack'), + 'let\'s meet up with ', + makeMention('Janet') + ].join(''), + [ + makeMention('John'), + makeMention('Josh'), + makeMention('Jeremy') + ].join('') + ].join('\n') + + const wrapper = shallowMount(RichContent, { + localVue, + propsData: { + handleLinks: false, + greentext: true, + emoji: [], + html + } + }) + + expect(wrapper.html()).to.eql(compwrap(html)) + }) + + it('Adds greentext and cyantext to the post', () => { + const html = [ + '>preordering videogames', + '>any year' + ].join('\n') + const expected = [ + '>preordering videogames', + '>any year' + ].join('\n') + + const wrapper = shallowMount(RichContent, { + localVue, + propsData: { + handleLinks: false, + greentext: true, + emoji: [], + html + } + }) + + expect(wrapper.html()).to.eql(compwrap(expected)) + }) + + it('Does not add greentext and cyantext if setting is set to false', () => { + const html = [ + '>preordering videogames', + '>any year' + ].join('\n') + + const wrapper = shallowMount(RichContent, { + localVue, + propsData: { + handleLinks: false, + greentext: false, + emoji: [], + html + } + }) + + expect(wrapper.html()).to.eql(compwrap(html)) + }) + + it('Adds emoji to post', () => { + const html = p('Ebin :DDDD :spurdo:') + const expected = p( + 'Ebin :DDDD ', + '' + ) + + const wrapper = shallowMount(RichContent, { + localVue, + propsData: { + handleLinks: false, + greentext: false, + emoji: [{ url: 'about:blank', shortcode: 'spurdo' }], + html + } + }) + + expect(wrapper.html()).to.eql(compwrap(expected)) + }) + + it('Doesn\'t add nonexistent emoji to post', () => { + const html = p('Lol :lol:') + + const wrapper = shallowMount(RichContent, { + localVue, + propsData: { + handleLinks: false, + greentext: false, + emoji: [], + html + } + }) + + expect(wrapper.html()).to.eql(compwrap(html)) + }) +}) diff --git a/test/unit/specs/services/html_converter/html_line_converter.spec.js b/test/unit/specs/services/html_converter/html_line_converter.spec.js index 532ea187..9485233f 100644 --- a/test/unit/specs/services/html_converter/html_line_converter.spec.js +++ b/test/unit/specs/services/html_converter/html_line_converter.spec.js @@ -2,7 +2,7 @@ import { convertHtmlToLines } from 'src/services/html_converter/html_line_conver const mapOnlyText = (processor) => (input) => input.text ? processor(input.text) : input -describe('TinyPostHTMLProcessor', () => { +describe('html_line_converter', () => { describe('with processor that keeps original line should not make any changes to HTML when', () => { const processorKeep = (line) => line it('fed with regular HTML with newlines', () => { diff --git a/test/unit/specs/services/html_converter/html_tree_converter.spec.js b/test/unit/specs/services/html_converter/html_tree_converter.spec.js index a54745c3..7283021b 100644 --- a/test/unit/specs/services/html_converter/html_tree_converter.spec.js +++ b/test/unit/specs/services/html_converter/html_tree_converter.spec.js @@ -1,6 +1,6 @@ -import { convertHtmlToTree, processTextForEmoji, getAttrs } from 'src/services/html_converter/html_tree_converter.service.js' +import { convertHtmlToTree } from 'src/services/html_converter/html_tree_converter.service.js' -describe('MiniHtmlConverter', () => { +describe('html_tree_converter', () => { describe('convertHtmlToTree', () => { it('converts html into a tree structure', () => { const input = '1

2

345' @@ -129,38 +129,4 @@ describe('MiniHtmlConverter', () => { ]) }) }) - - describe('processTextForEmoji', () => { - it('processes all emoji in text', () => { - const input = 'Hello from finland! :lol: We have best water! :lmao:' - const emojis = [ - { shortcode: 'lol', src: 'LOL' }, - { shortcode: 'lmao', src: 'LMAO' } - ] - const processor = ({ shortcode, src }) => ({ shortcode, src }) - expect(processTextForEmoji(input, emojis, processor)).to.eql([ - 'Hello from finland! ', - { shortcode: 'lol', src: 'LOL' }, - ' We have best water! ', - { shortcode: 'lmao', src: 'LMAO' } - ]) - }) - it('leaves text as is', () => { - const input = 'Number one: that\'s terror' - const emojis = [] - const processor = ({ shortcode, src }) => ({ shortcode, src }) - expect(processTextForEmoji(input, emojis, processor)).to.eql([ - 'Number one: that\'s terror' - ]) - }) - }) - - describe('getAttrs', () => { - it('extracts arguments from tag', () => { - const input = '' - const output = { src: 'boop', cool: true, ebin: 'true' } - - expect(getAttrs(input)).to.eql(output) - }) - }) }) diff --git a/test/unit/specs/services/html_converter/utility.spec.js b/test/unit/specs/services/html_converter/utility.spec.js new file mode 100644 index 00000000..cf6fd99b --- /dev/null +++ b/test/unit/specs/services/html_converter/utility.spec.js @@ -0,0 +1,37 @@ +import { processTextForEmoji, getAttrs } from 'src/services/html_converter/utility.service.js' + +describe('html_converter utility', () => { + describe('processTextForEmoji', () => { + it('processes all emoji in text', () => { + const input = 'Hello from finland! :lol: We have best water! :lmao:' + const emojis = [ + { shortcode: 'lol', src: 'LOL' }, + { shortcode: 'lmao', src: 'LMAO' } + ] + const processor = ({ shortcode, src }) => ({ shortcode, src }) + expect(processTextForEmoji(input, emojis, processor)).to.eql([ + 'Hello from finland! ', + { shortcode: 'lol', src: 'LOL' }, + ' We have best water! ', + { shortcode: 'lmao', src: 'LMAO' } + ]) + }) + it('leaves text as is', () => { + const input = 'Number one: that\'s terror' + const emojis = [] + const processor = ({ shortcode, src }) => ({ shortcode, src }) + expect(processTextForEmoji(input, emojis, processor)).to.eql([ + 'Number one: that\'s terror' + ]) + }) + }) + + describe('getAttrs', () => { + it('extracts arguments from tag', () => { + const input = '' + const output = { src: 'boop', cool: true, ebin: 'true' } + + expect(getAttrs(input)).to.eql(output) + }) + }) +}) -- cgit v1.2.3-70-g09d2 From bebafa1a2c38972245d37de70f4aec4bfb2083fd Mon Sep 17 00:00:00 2001 From: Henry Jameson Date: Sun, 13 Jun 2021 13:29:26 +0300 Subject: refactored line converter, untied its logic from greentexting, better handling of broken cases --- src/components/rich_content/rich_content.jsx | 10 +++- .../html_converter/html_line_converter.service.js | 55 ++++++++++++++++++---- .../html_converter/html_line_converter.spec.js | 19 ++++++-- 3 files changed, 68 insertions(+), 16 deletions(-) (limited to 'src/services/html_converter') diff --git a/src/components/rich_content/rich_content.jsx b/src/components/rich_content/rich_content.jsx index e188763f..328e9201 100644 --- a/src/components/rich_content/rich_content.jsx +++ b/src/components/rich_content/rich_content.jsx @@ -246,6 +246,7 @@ const getLinkData = (attrs, children, index) => { */ export const preProcessPerLine = (html, greentext, handleLinks) => { const lastMentions = [] + const greentextHandle = new Set(['p', 'div']) let nonEmptyIndex = -1 const newHtml = convertHtmlToLines(html).reverse().map((item, index, array) => { @@ -256,7 +257,14 @@ export const preProcessPerLine = (html, greentext, handleLinks) => { nonEmptyIndex += 1 // Greentext stuff - if (greentext && (string.includes('>') || string.includes('<'))) { + if ( + // Only if greentext is engaged + greentext && + // Only handle p's and divs. Don't want to affect blocquotes, code etc + item.level.every(l => greentextHandle.has(l)) && + // Only if line begins with '>' or '<' + (string.includes('>') || string.includes('<')) + ) { const cleanedString = string.replace(/<[^>]+?>/gi, '') // remove all tags .replace(/@\w+/gi, '') // remove mentions (even failed ones) .trim() diff --git a/src/services/html_converter/html_line_converter.service.js b/src/services/html_converter/html_line_converter.service.js index e448d5cd..f43d162a 100644 --- a/src/services/html_converter/html_line_converter.service.js +++ b/src/services/html_converter/html_line_converter.service.js @@ -19,9 +19,42 @@ import { getTagName } from './utility.service.js' * @return {(string|{ text: string })[]} processed html in form of a list. */ export const convertHtmlToLines = (html) => { - const ignoredTags = new Set(['code', 'blockquote']) - const handledTags = new Set(['p', 'br', 'div', 'pre', 'code', 'blockquote']) - const openCloseTags = new Set(['p', 'div', 'pre', 'code', 'blockquote']) + // Elements that are implicitly self-closing + // https://developer.mozilla.org/en-US/docs/Glossary/empty_element + const emptyElements = new Set([ + 'area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', + 'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr' + ]) + // Block-level element (they make a visual line) + // https://developer.mozilla.org/en-US/docs/Web/HTML/Block-level_elements + const blockElements = new Set([ + 'address', 'article', 'aside', 'blockquote', 'details', 'dialog', 'dd', + 'div', 'dl', 'dt', 'fieldset', 'figcaption', 'figure', 'footer', 'form', + 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'header', 'hgroup', 'hr', 'li', 'main', + 'nav', 'ol', 'p', 'pre', 'section', 'table', 'ul' + ]) + // br is very weird in a way that it's technically not block-level, it's + // essentially converted to a \n (or \r\n). There's also wbr but it doesn't + // guarantee linebreak, only suggest it. + const linebreakElements = new Set(['br']) + + const visualLineElements = new Set([ + ...blockElements.values(), + ...linebreakElements.values() + ]) + + // All block-level elements that aren't empty elements, i.e. not
+ const nonEmptyElements = new Set(visualLineElements) + // Difference + for (let elem of emptyElements) { + nonEmptyElements.delete(elem) + } + + // All elements that we are recognizing + const allElements = new Set([ + ...nonEmptyElements.values(), + ...emptyElements.values() + ]) let buffer = [] // Current output buffer const level = [] // How deep we are in tags and which tags were there @@ -29,8 +62,8 @@ export const convertHtmlToLines = (html) => { let tagBuffer = null // Current tag buffer, if null = we are not currently reading a tag const flush = () => { // Processes current line buffer, adds it to output buffer and clears line buffer - if (textBuffer.trim().length > 0 && !level.some(l => ignoredTags.has(l))) { - buffer.push({ text: textBuffer }) + if (textBuffer.trim().length > 0) { + buffer.push({ level: [...level], text: textBuffer }) } else { buffer.push(textBuffer) } @@ -49,10 +82,12 @@ export const convertHtmlToLines = (html) => { } const handleClose = (tag) => { // handles closing tags - flush() - buffer.push(tag) if (level[0] === getTagName(tag)) { + flush() + buffer.push(tag) level.shift() + } else { // Broken case + textBuffer += tag } } @@ -67,10 +102,10 @@ export const convertHtmlToLines = (html) => { const tagFull = tagBuffer tagBuffer = null const tagName = getTagName(tagFull) - if (handledTags.has(tagName)) { - if (tagName === 'br') { + if (allElements.has(tagName)) { + if (linebreakElements.has(tagName)) { handleBr(tagFull) - } else if (openCloseTags.has(tagName)) { + } else if (nonEmptyElements.has(tagName)) { if (tagFull[1] === '/') { handleClose(tagFull) } else if (tagFull[tagFull.length - 2] === '/') { diff --git a/test/unit/specs/services/html_converter/html_line_converter.spec.js b/test/unit/specs/services/html_converter/html_line_converter.spec.js index 9485233f..c8c89700 100644 --- a/test/unit/specs/services/html_converter/html_line_converter.spec.js +++ b/test/unit/specs/services/html_converter/html_line_converter.spec.js @@ -1,8 +1,17 @@ import { convertHtmlToLines } from 'src/services/html_converter/html_line_converter.service.js' -const mapOnlyText = (processor) => (input) => input.text ? processor(input.text) : input - -describe('html_line_converter', () => { +const greentextHandle = new Set(['p', 'div']) +const mapOnlyText = (processor) => (input) => { + if (input.text && input.level.every(l => greentextHandle.has(l))) { + return processor(input.text) + } else if (input.text) { + return input.text + } else { + return input + } +} + +describe.only('html_line_converter', () => { describe('with processor that keeps original line should not make any changes to HTML when', () => { const processorKeep = (line) => line it('fed with regular HTML with newlines', () => { @@ -81,7 +90,7 @@ describe('html_line_converter', () => { it('fed with very broken HTML with broken composition', () => { const input = '

lmao what whats going on
wha

' - const output = '

_
_
_

' + const output = '_

_

' const result = convertHtmlToLines(input) const comparableResult = result.map(mapOnlyText(processorReplace)).join('') expect(comparableResult).to.eql(output) @@ -111,7 +120,7 @@ describe('html_line_converter', () => { expect(comparableResult).to.eql(output) }) - it('fed with maybe valid HTML? self-closing divs and ps', () => { + it('fed with maybe valid HTML? (XHTML) self-closing divs and ps', () => { const input = 'a

what now

?' const output = '_

_

_' const result = convertHtmlToLines(input) -- cgit v1.2.3-70-g09d2 From 8fe4355a6b84ae81b54228e6749a4ab82966ff2e Mon Sep 17 00:00:00 2001 From: Henry Jameson Date: Fri, 18 Jun 2021 21:29:47 +0300 Subject: fix rich images --- src/services/html_converter/html_line_converter.service.js | 2 ++ .../unit/specs/services/html_converter/html_line_converter.spec.js | 7 +++++++ 2 files changed, 9 insertions(+) (limited to 'src/services/html_converter') diff --git a/src/services/html_converter/html_line_converter.service.js b/src/services/html_converter/html_line_converter.service.js index f43d162a..74103b02 100644 --- a/src/services/html_converter/html_line_converter.service.js +++ b/src/services/html_converter/html_line_converter.service.js @@ -114,6 +114,8 @@ export const convertHtmlToLines = (html) => { } else { handleOpen(tagFull) } + } else { + textBuffer += tagFull } } else { textBuffer += tagFull diff --git a/test/unit/specs/services/html_converter/html_line_converter.spec.js b/test/unit/specs/services/html_converter/html_line_converter.spec.js index de7c7fc2..86bd7e8b 100644 --- a/test/unit/specs/services/html_converter/html_line_converter.spec.js +++ b/test/unit/specs/services/html_converter/html_line_converter.spec.js @@ -69,6 +69,13 @@ describe('html_line_converter', () => { const comparableResult = result.map(mapOnlyText(processorKeep)).join('') expect(comparableResult).to.eql(inputOutput) }) + + it('fed with some recognized but not handled elements', () => { + const inputOutput = 'testing images\n\n' + const result = convertHtmlToLines(inputOutput) + const comparableResult = result.map(mapOnlyText(processorKeep)).join('') + expect(comparableResult).to.eql(inputOutput) + }) }) describe('with processor that replaces lines with word "_" should match expected line when', () => { const processorReplace = (line) => '_' -- cgit v1.2.3-70-g09d2 From f16658adfc897a3b07ed7f79d872acd2c3837cc8 Mon Sep 17 00:00:00 2001 From: Henry Jameson Date: Sun, 15 Aug 2021 02:59:14 +0300 Subject: fix tests --- src/services/html_converter/html_line_converter.service.js | 2 +- src/services/html_converter/html_tree_converter.service.js | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'src/services/html_converter') diff --git a/src/services/html_converter/html_line_converter.service.js b/src/services/html_converter/html_line_converter.service.js index 74103b02..5eeaa7cb 100644 --- a/src/services/html_converter/html_line_converter.service.js +++ b/src/services/html_converter/html_line_converter.service.js @@ -18,7 +18,7 @@ import { getTagName } from './utility.service.js' * @param {Object} input - input data * @return {(string|{ text: string })[]} processed html in form of a list. */ -export const convertHtmlToLines = (html) => { +export const convertHtmlToLines = (html = '') => { // Elements that are implicitly self-closing // https://developer.mozilla.org/en-US/docs/Glossary/empty_element const emptyElements = new Set([ diff --git a/src/services/html_converter/html_tree_converter.service.js b/src/services/html_converter/html_tree_converter.service.js index 804d35d7..6a8796c4 100644 --- a/src/services/html_converter/html_tree_converter.service.js +++ b/src/services/html_converter/html_tree_converter.service.js @@ -19,7 +19,7 @@ import { getTagName } from './utility.service.js' * @param {Object} input - input data * @return {string} processed html */ -export const convertHtmlToTree = (html) => { +export const convertHtmlToTree = (html = '') => { // Elements that are implicitly self-closing // https://developer.mozilla.org/en-US/docs/Glossary/empty_element const emptyElements = new Set([ -- cgit v1.2.3-70-g09d2