From cc00af7a3102034b05ebcd4aa1fd01c6f467184a Mon Sep 17 00:00:00 2001
From: Henry Jameson
Date: Thu, 10 Jun 2021 18:52:01 +0300
Subject: Hellthread(tm) Certified
---
.../html_converter/html_line_converter.service.js | 102 ++++++++++++++
.../html_converter/html_tree_converter.service.js | 146 +++++++++++++++++++++
2 files changed, 248 insertions(+)
create mode 100644 src/services/html_converter/html_line_converter.service.js
create mode 100644 src/services/html_converter/html_tree_converter.service.js
(limited to 'src/services/html_converter')
diff --git a/src/services/html_converter/html_line_converter.service.js b/src/services/html_converter/html_line_converter.service.js
new file mode 100644
index 00000000..80482c9a
--- /dev/null
+++ b/src/services/html_converter/html_line_converter.service.js
@@ -0,0 +1,102 @@
+/**
+ * This is a tiny purpose-built HTML parser/processor. This basically detects
+ * any type of visual newline and converts entire HTML into a array structure.
+ *
+ * Text nodes are represented as object with single property - text - containing
+ * the visual line. Intended usage is to process the array with .map() in which
+ * map function returns a string and resulting array can be converted back to html
+ * with a .join('').
+ *
+ * Generally this isn't very useful except for when you really need to either
+ * modify visual lines (greentext i.e. simple quoting) or do something with
+ * first/last line.
+ *
+ * known issue: doesn't handle CDATA so nested CDATA might not work well
+ *
+ * @param {Object} input - input data
+ * @return {(string|{ text: string })[]} processed html in form of a list.
+ */
+export const convertHtmlToLines = (html) => {
+ const handledTags = new Set(['p', 'br', 'div'])
+ const openCloseTags = new Set(['p', 'div'])
+
+ let buffer = [] // Current output buffer
+ const level = [] // How deep we are in tags and which tags were there
+ let textBuffer = '' // Current line content
+ let tagBuffer = null // Current tag buffer, if null = we are not currently reading a tag
+
+ // Extracts tag name from tag, i.e. => span
+ const getTagName = (tag) => {
+ const result = /(?:<\/(\w+)>|<(\w+)\s?[^/]*?\/?>)/gi.exec(tag)
+ return result && (result[1] || result[2])
+ }
+
+ const flush = () => { // Processes current line buffer, adds it to output buffer and clears line buffer
+ if (textBuffer.trim().length > 0) {
+ buffer.push({ text: textBuffer })
+ } else {
+ buffer.push(textBuffer)
+ }
+ textBuffer = ''
+ }
+
+ const handleBr = (tag) => { // handles single newlines/linebreaks/selfclosing
+ flush()
+ buffer.push(tag)
+ }
+
+ const handleOpen = (tag) => { // handles opening tags
+ flush()
+ buffer.push(tag)
+ level.push(tag)
+ }
+
+ const handleClose = (tag) => { // handles closing tags
+ flush()
+ buffer.push(tag)
+ if (level[level.length - 1] === tag) {
+ level.pop()
+ }
+ }
+
+ for (let i = 0; i < html.length; i++) {
+ const char = html[i]
+ if (char === '<' && tagBuffer === null) {
+ tagBuffer = char
+ } else if (char !== '>' && tagBuffer !== null) {
+ tagBuffer += char
+ } else if (char === '>' && tagBuffer !== null) {
+ tagBuffer += char
+ const tagFull = tagBuffer
+ tagBuffer = null
+ const tagName = getTagName(tagFull)
+ if (handledTags.has(tagName)) {
+ if (tagName === 'br') {
+ handleBr(tagFull)
+ } else if (openCloseTags.has(tagName)) {
+ if (tagFull[1] === '/') {
+ handleClose(tagFull)
+ } else if (tagFull[tagFull.length - 2] === '/') {
+ // self-closing
+ handleBr(tagFull)
+ } else {
+ handleOpen(tagFull)
+ }
+ }
+ } else {
+ textBuffer += tagFull
+ }
+ } else if (char === '\n') {
+ handleBr(char)
+ } else {
+ textBuffer += char
+ }
+ }
+ if (tagBuffer) {
+ textBuffer += tagBuffer
+ }
+
+ flush()
+
+ return buffer
+}
diff --git a/src/services/html_converter/html_tree_converter.service.js b/src/services/html_converter/html_tree_converter.service.js
new file mode 100644
index 00000000..badd473a
--- /dev/null
+++ b/src/services/html_converter/html_tree_converter.service.js
@@ -0,0 +1,146 @@
+/**
+ * This is a not-so-tiny purpose-built HTML parser/processor. This parses html
+ * and converts it into a tree structure representing tag openers/closers and
+ * children.
+ *
+ * Structure follows this pattern: [opener, [...children], closer] except root
+ * node which is just [...children]. Text nodes can only be within children and
+ * are represented as strings.
+ *
+ * Intended use is to convert HTML structure and then recursively iterate over it
+ * most likely using a map. Very useful for dynamically rendering html replacing
+ * tags with JSX elements in a render function.
+ *
+ * known issue: doesn't handle CDATA so CDATA might not work well
+ * known issue: doesn't handle HTML comments
+ *
+ * @param {Object} input - input data
+ * @return {string} processed html
+ */
+export const convertHtmlToTree = (html) => {
+ // Elements that are implicitly self-closing
+ // https://developer.mozilla.org/en-US/docs/Glossary/empty_element
+ const emptyElements = new Set([
+ 'area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input',
+ 'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr'
+ ])
+ // TODO For future - also parse HTML5 multi-source components?
+
+ const buffer = [] // Current output buffer
+ const levels = [['', buffer]] // How deep we are in tags and which tags were there
+ let textBuffer = '' // Current line content
+ let tagBuffer = null // Current tag buffer, if null = we are not currently reading a tag
+
+ const getCurrentBuffer = () => {
+ return levels[levels.length - 1][1]
+ }
+
+ const flushText = () => { // Processes current line buffer, adds it to output buffer and clears line buffer
+ if (textBuffer === '') return
+ getCurrentBuffer().push(textBuffer)
+ textBuffer = ''
+ }
+
+ const handleSelfClosing = (tag) => {
+ getCurrentBuffer().push([tag])
+ }
+
+ const handleOpen = (tag) => {
+ const curBuf = getCurrentBuffer()
+ const newLevel = [tag, []]
+ levels.push(newLevel)
+ curBuf.push(newLevel)
+ }
+
+ const handleClose = (tag) => {
+ const currentTag = levels[levels.length - 1]
+ if (getTagName(levels[levels.length - 1][0]) === getTagName(tag)) {
+ currentTag.push(tag)
+ levels.pop()
+ } else {
+ getCurrentBuffer().push(tag)
+ }
+ }
+
+ for (let i = 0; i < html.length; i++) {
+ const char = html[i]
+ if (char === '<' && tagBuffer === null) {
+ flushText()
+ tagBuffer = char
+ } else if (char !== '>' && tagBuffer !== null) {
+ tagBuffer += char
+ } else if (char === '>' && tagBuffer !== null) {
+ tagBuffer += char
+ const tagFull = tagBuffer
+ tagBuffer = null
+ const tagName = getTagName(tagFull)
+ if (tagFull[1] === '/') {
+ handleClose(tagFull)
+ } else if (emptyElements.has(tagName) || tagFull[tagFull.length - 2] === '/') {
+ // self-closing
+ handleSelfClosing(tagFull)
+ } else {
+ handleOpen(tagFull)
+ }
+ } else {
+ textBuffer += char
+ }
+ }
+ if (tagBuffer) {
+ textBuffer += tagBuffer
+ }
+
+ flushText()
+ return buffer
+}
+
+// Extracts tag name from tag, i.e. => span
+export const getTagName = (tag) => {
+ const result = /(?:<\/(\w+)>|<(\w+)\s?.*?\/?>)/gi.exec(tag)
+ return result && (result[1] || result[2])
+}
+
+export const processTextForEmoji = (text, emojis, processor) => {
+ const buffer = []
+ let textBuffer = ''
+ for (let i = 0; i < text.length; i++) {
+ const char = text[i]
+ if (char === ':') {
+ const next = text.slice(i + 1)
+ let found = false
+ for (let emoji of emojis) {
+ if (next.slice(0, emoji.shortcode.length + 1) === (emoji.shortcode + ':')) {
+ found = emoji
+ break
+ }
+ }
+ if (found) {
+ buffer.push(textBuffer)
+ textBuffer = ''
+ buffer.push(processor(found))
+ i += found.shortcode.length + 1
+ } else {
+ textBuffer += char
+ }
+ } else {
+ textBuffer += char
+ }
+ }
+ if (textBuffer) buffer.push(textBuffer)
+ return buffer
+}
+
+export const getAttrs = tag => {
+ const innertag = tag
+ .substring(1, tag.length - 1)
+ .replace(new RegExp('^' + getTagName(tag)), '')
+ .replace(/\/?$/, '')
+ .trim()
+ const attrs = Array.from(innertag.matchAll(/([a-z0-9-]+)(?:=("[^"]+?"|'[^']+?'))?/gi))
+ .map(([trash, key, value]) => [key, value])
+ .map(([k, v]) => {
+ if (!v) return [k, true]
+ return [k, v.substring(1, v.length - 1)]
+ })
+ return Object.fromEntries(attrs)
+}
--
cgit v1.2.3-70-g09d2
From 5834790d0b38d487e314e8419509d162abbd6a80 Mon Sep 17 00:00:00 2001
From: Henry Jameson
Date: Fri, 11 Jun 2021 11:50:05 +0300
Subject: fix #935
---
.../html_converter/html_line_converter.service.js | 13 +++++------
.../html_converter/html_line_converter.spec.js | 25 ++++++++++++++++++++++
2 files changed, 32 insertions(+), 6 deletions(-)
(limited to 'src/services/html_converter')
diff --git a/src/services/html_converter/html_line_converter.service.js b/src/services/html_converter/html_line_converter.service.js
index 80482c9a..d8f5ecb8 100644
--- a/src/services/html_converter/html_line_converter.service.js
+++ b/src/services/html_converter/html_line_converter.service.js
@@ -17,8 +17,9 @@
* @return {(string|{ text: string })[]} processed html in form of a list.
*/
export const convertHtmlToLines = (html) => {
- const handledTags = new Set(['p', 'br', 'div'])
- const openCloseTags = new Set(['p', 'div'])
+ const ignoredTags = new Set(['code', 'blockquote'])
+ const handledTags = new Set(['p', 'br', 'div', 'pre', 'code', 'blockquote'])
+ const openCloseTags = new Set(['p', 'div', 'pre', 'code', 'blockquote'])
let buffer = [] // Current output buffer
const level = [] // How deep we are in tags and which tags were there
@@ -32,7 +33,7 @@ export const convertHtmlToLines = (html) => {
}
const flush = () => { // Processes current line buffer, adds it to output buffer and clears line buffer
- if (textBuffer.trim().length > 0) {
+ if (textBuffer.trim().length > 0 && !level.some(l => ignoredTags.has(l))) {
buffer.push({ text: textBuffer })
} else {
buffer.push(textBuffer)
@@ -48,14 +49,14 @@ export const convertHtmlToLines = (html) => {
const handleOpen = (tag) => { // handles opening tags
flush()
buffer.push(tag)
- level.push(tag)
+ level.unshift(getTagName(tag))
}
const handleClose = (tag) => { // handles closing tags
flush()
buffer.push(tag)
- if (level[level.length - 1] === tag) {
- level.pop()
+ if (level[0] === getTagName(tag)) {
+ level.shift()
}
}
diff --git a/test/unit/specs/services/html_converter/html_line_converter.spec.js b/test/unit/specs/services/html_converter/html_line_converter.spec.js
index 82cb4170..532ea187 100644
--- a/test/unit/specs/services/html_converter/html_line_converter.spec.js
+++ b/test/unit/specs/services/html_converter/html_line_converter.spec.js
@@ -126,5 +126,30 @@ describe('TinyPostHTMLProcessor', () => {
const comparableResult = result.map(mapOnlyText(processorReplace)).join('')
expect(comparableResult).to.eql(output)
})
+
+ it('Testing handling ignored blocks', () => {
+ const input = `
+ > rei = "0"
+ '0'
+ > rei == 0
+ true
+ > rei == null
+ false
That, christian-like JS diagram but it’s evangelion instead.
+ `
+ const result = convertHtmlToLines(input)
+ const comparableResult = result.map(mapOnlyText(processorReplace)).join('')
+ expect(comparableResult).to.eql(input)
+ })
+ it('Testing handling ignored blocks 2', () => {
+ const input = `
+ An SSL error has happened.
Shakespeare
+ `
+ const output = `
+ An SSL error has happened.
_
+ `
+ const result = convertHtmlToLines(input)
+ const comparableResult = result.map(mapOnlyText(processorReplace)).join('')
+ expect(comparableResult).to.eql(output)
+ })
})
})
--
cgit v1.2.3-70-g09d2
From cd4455675024a3dfc8930184114d5f92438d0466 Mon Sep 17 00:00:00 2001
From: Henry Jameson
Date: Sat, 12 Jun 2021 19:47:23 +0300
Subject: restructure and tests
squash! restructure and tests
---
src/components/rich_content/rich_content.jsx | 17 +-
src/components/status_body/status_body.vue | 1 -
.../html_converter/html_line_converter.service.js | 8 +-
.../html_converter/html_tree_converter.service.js | 53 +--
src/services/html_converter/utility.service.js | 73 +++++
test/unit/specs/components/rich_content.spec.js | 357 +++++++++++++++++++++
.../html_converter/html_line_converter.spec.js | 2 +-
.../html_converter/html_tree_converter.spec.js | 38 +--
.../specs/services/html_converter/utility.spec.js | 37 +++
9 files changed, 481 insertions(+), 105 deletions(-)
create mode 100644 src/services/html_converter/utility.service.js
create mode 100644 test/unit/specs/components/rich_content.spec.js
create mode 100644 test/unit/specs/services/html_converter/utility.spec.js
(limited to 'src/services/html_converter')
diff --git a/src/components/rich_content/rich_content.jsx b/src/components/rich_content/rich_content.jsx
index ad77d615..ef15aaeb 100644
--- a/src/components/rich_content/rich_content.jsx
+++ b/src/components/rich_content/rich_content.jsx
@@ -1,6 +1,7 @@
import Vue from 'vue'
import { unescape, flattenDeep } from 'lodash'
-import { convertHtmlToTree, getTagName, processTextForEmoji, getAttrs } from 'src/services/html_converter/html_tree_converter.service.js'
+import { getTagName, processTextForEmoji, getAttrs } from 'src/services/html_converter/utility.service.js'
+import { convertHtmlToTree } from 'src/services/html_converter/html_tree_converter.service.js'
import { convertHtmlToLines } from 'src/services/html_converter/html_line_converter.service.js'
import StillImage from 'src/components/still-image/still-image.vue'
import MentionLink from 'src/components/mention_link/mention_link.vue'
@@ -31,18 +32,12 @@ export default Vue.component('RichContent', {
required: false,
type: Boolean,
default: false
- },
- // Whether to hide last mentions (hellthreads)
- hideMentions: {
- required: false,
- type: Boolean,
- default: false
}
},
// NEVER EVER TOUCH DATA INSIDE RENDER
render (h) {
// Pre-process HTML
- const { newHtml: html, lastMentions } = preProcessPerLine(this.html, this.greentext, this.hideMentions)
+ const { newHtml: html, lastMentions } = preProcessPerLine(this.html, this.greentext, this.handleLinks)
const firstMentions = [] // Mentions that appear in the beginning of post body
const lastTags = [] // Tags that appear at the end of post body
const writtenMentions = [] // All mentions that appear in post body
@@ -228,8 +223,9 @@ const getLinkData = (attrs, children, index) => {
*
* @param {String} html - raw HTML to process
* @param {Boolean} greentext - whether to enable greentexting or not
+ * @param {Boolean} handleLinks - whether to handle links or not
*/
-export const preProcessPerLine = (html, greentext) => {
+export const preProcessPerLine = (html, greentext, handleLinks) => {
const lastMentions = []
let nonEmptyIndex = 0
@@ -264,6 +260,7 @@ export const preProcessPerLine = (html, greentext) => {
const tag = getTagName(opener)
// If we have a link we probably have mentions
if (tag === 'a') {
+ if (!handleLinks) return [opener, children, closer]
const attrs = getAttrs(opener)
if (attrs['class'] && attrs['class'].includes('mention')) {
// Got mentions
@@ -297,7 +294,7 @@ export const preProcessPerLine = (html, greentext) => {
const result = [...tree].map(process)
// Only check last (first since list is reversed) line
- if (hasMentions && !hasLooseText && nonEmptyIndex++ === 0) {
+ if (handleLinks && hasMentions && !hasLooseText && nonEmptyIndex++ === 0) {
let mentionIndex = 0
const process = (item) => {
if (Array.isArray(item)) {
diff --git a/src/components/status_body/status_body.vue b/src/components/status_body/status_body.vue
index b84541d7..aac44e42 100644
--- a/src/components/status_body/status_body.vue
+++ b/src/components/status_body/status_body.vue
@@ -52,7 +52,6 @@
:html="status.raw_html"
:emoji="status.emojis"
:handle-links="true"
- :hide-mentions="hideMentions"
:greentext="mergedConfig.greentext"
@parseReady="setHeadTailLinks"
/>
diff --git a/src/services/html_converter/html_line_converter.service.js b/src/services/html_converter/html_line_converter.service.js
index d8f5ecb8..e448d5cd 100644
--- a/src/services/html_converter/html_line_converter.service.js
+++ b/src/services/html_converter/html_line_converter.service.js
@@ -1,3 +1,5 @@
+import { getTagName } from './utility.service.js'
+
/**
* This is a tiny purpose-built HTML parser/processor. This basically detects
* any type of visual newline and converts entire HTML into a array structure.
@@ -26,12 +28,6 @@ export const convertHtmlToLines = (html) => {
let textBuffer = '' // Current line content
let tagBuffer = null // Current tag buffer, if null = we are not currently reading a tag
- // Extracts tag name from tag, i.e. => span
- const getTagName = (tag) => {
- const result = /(?:<\/(\w+)>|<(\w+)\s?[^/]*?\/?>)/gi.exec(tag)
- return result && (result[1] || result[2])
- }
-
const flush = () => { // Processes current line buffer, adds it to output buffer and clears line buffer
if (textBuffer.trim().length > 0 && !level.some(l => ignoredTags.has(l))) {
buffer.push({ text: textBuffer })
diff --git a/src/services/html_converter/html_tree_converter.service.js b/src/services/html_converter/html_tree_converter.service.js
index badd473a..804d35d7 100644
--- a/src/services/html_converter/html_tree_converter.service.js
+++ b/src/services/html_converter/html_tree_converter.service.js
@@ -1,3 +1,5 @@
+import { getTagName } from './utility.service.js'
+
/**
* This is a not-so-tiny purpose-built HTML parser/processor. This parses html
* and converts it into a tree structure representing tag openers/closers and
@@ -93,54 +95,3 @@ export const convertHtmlToTree = (html) => {
flushText()
return buffer
}
-
-// Extracts tag name from tag, i.e. => span
-export const getTagName = (tag) => {
- const result = /(?:<\/(\w+)>|<(\w+)\s?.*?\/?>)/gi.exec(tag)
- return result && (result[1] || result[2])
-}
-
-export const processTextForEmoji = (text, emojis, processor) => {
- const buffer = []
- let textBuffer = ''
- for (let i = 0; i < text.length; i++) {
- const char = text[i]
- if (char === ':') {
- const next = text.slice(i + 1)
- let found = false
- for (let emoji of emojis) {
- if (next.slice(0, emoji.shortcode.length + 1) === (emoji.shortcode + ':')) {
- found = emoji
- break
- }
- }
- if (found) {
- buffer.push(textBuffer)
- textBuffer = ''
- buffer.push(processor(found))
- i += found.shortcode.length + 1
- } else {
- textBuffer += char
- }
- } else {
- textBuffer += char
- }
- }
- if (textBuffer) buffer.push(textBuffer)
- return buffer
-}
-
-export const getAttrs = tag => {
- const innertag = tag
- .substring(1, tag.length - 1)
- .replace(new RegExp('^' + getTagName(tag)), '')
- .replace(/\/?$/, '')
- .trim()
- const attrs = Array.from(innertag.matchAll(/([a-z0-9-]+)(?:=("[^"]+?"|'[^']+?'))?/gi))
- .map(([trash, key, value]) => [key, value])
- .map(([k, v]) => {
- if (!v) return [k, true]
- return [k, v.substring(1, v.length - 1)]
- })
- return Object.fromEntries(attrs)
-}
diff --git a/src/services/html_converter/utility.service.js b/src/services/html_converter/utility.service.js
new file mode 100644
index 00000000..4d0c36c2
--- /dev/null
+++ b/src/services/html_converter/utility.service.js
@@ -0,0 +1,73 @@
+/**
+ * Extract tag name from tag opener/closer.
+ *
+ * @param {String} tag - tag string, i.e. ''
+ * @return {String} - tagname, i.e. "div"
+ */
+export const getTagName = (tag) => {
+ const result = /(?:<\/(\w+)>|<(\w+)\s?.*?\/?>)/gi.exec(tag)
+ return result && (result[1] || result[2])
+}
+
+/**
+ * Extract attributes from tag opener.
+ *
+ * @param {String} tag - tag string, i.e. ''
+ * @return {Object} - map of attributes key = attribute name, value = attribute value
+ * attributes without values represented as boolean true
+ */
+export const getAttrs = tag => {
+ const innertag = tag
+ .substring(1, tag.length - 1)
+ .replace(new RegExp('^' + getTagName(tag)), '')
+ .replace(/\/?$/, '')
+ .trim()
+ const attrs = Array.from(innertag.matchAll(/([a-z0-9-]+)(?:=("[^"]+?"|'[^']+?'))?/gi))
+ .map(([trash, key, value]) => [key, value])
+ .map(([k, v]) => {
+ if (!v) return [k, true]
+ return [k, v.substring(1, v.length - 1)]
+ })
+ return Object.fromEntries(attrs)
+}
+
+/**
+ * Finds shortcodes in text
+ *
+ * @param {String} text - original text to find emojis in
+ * @param {{ url: String, shortcode: Sring }[]} emoji - list of shortcodes to find
+ * @param {Function} processor - function to call on each encountered emoji,
+ * function is passed single object containing matching emoji ({ url, shortcode })
+ * return value will be inserted into resulting array instead of :shortcode:
+ * @return {Array} resulting array with non-emoji parts of text and whatever {processor}
+ * returned for emoji
+ */
+export const processTextForEmoji = (text, emojis, processor) => {
+ const buffer = []
+ let textBuffer = ''
+ for (let i = 0; i < text.length; i++) {
+ const char = text[i]
+ if (char === ':') {
+ const next = text.slice(i + 1)
+ let found = false
+ for (let emoji of emojis) {
+ if (next.slice(0, emoji.shortcode.length + 1) === (emoji.shortcode + ':')) {
+ found = emoji
+ break
+ }
+ }
+ if (found) {
+ buffer.push(textBuffer)
+ textBuffer = ''
+ buffer.push(processor(found))
+ i += found.shortcode.length + 1
+ } else {
+ textBuffer += char
+ }
+ } else {
+ textBuffer += char
+ }
+ }
+ if (textBuffer) buffer.push(textBuffer)
+ return buffer
+}
diff --git a/test/unit/specs/components/rich_content.spec.js b/test/unit/specs/components/rich_content.spec.js
new file mode 100644
index 00000000..05c0b259
--- /dev/null
+++ b/test/unit/specs/components/rich_content.spec.js
@@ -0,0 +1,357 @@
+import { shallowMount, createLocalVue } from '@vue/test-utils'
+import RichContent from 'src/components/rich_content/rich_content.jsx'
+
+const localVue = createLocalVue()
+
+const makeMention = (who) => `@${who}`
+const stubMention = (who) => ``
+const lastMentions = (...data) => `${data.join('')}`
+const p = (...data) => `${data.join('')}
`
+const compwrap = (...data) => `${data.join('')}`
+const removedMentionSpan = ''
+
+describe('RichContent', () => {
+ it('renders simple post without exploding', () => {
+ const html = p('Hello world!')
+ const wrapper = shallowMount(RichContent, {
+ localVue,
+ propsData: {
+ handleLinks: true,
+ greentext: true,
+ emoji: [],
+ html
+ }
+ })
+
+ expect(wrapper.html()).to.eql(compwrap(html))
+ })
+
+ it('removes mentions from the beginning of post', () => {
+ const html = p(
+ makeMention('John'),
+ ' how are you doing thoday?'
+ )
+ const expected = p(
+ removedMentionSpan,
+ 'how are you doing thoday?'
+ )
+ const wrapper = shallowMount(RichContent, {
+ localVue,
+ propsData: {
+ handleLinks: true,
+ greentext: true,
+ emoji: [],
+ html
+ }
+ })
+
+ expect(wrapper.html()).to.eql(compwrap(expected))
+ })
+
+ it('removes mentions from the end of the hellpost ()', () => {
+ const html = [
+ p('How are you doing today, fine gentlemen?'),
+ p(
+ makeMention('John'),
+ makeMention('Josh'),
+ makeMention('Jeremy')
+ )
+ ].join('')
+ const expected = [
+ p(
+ 'How are you doing today, fine gentlemen?'
+ ),
+ // TODO fix this extra line somehow?
+ p()
+ ].join('')
+
+ const wrapper = shallowMount(RichContent, {
+ localVue,
+ propsData: {
+ handleLinks: true,
+ greentext: true,
+ emoji: [],
+ html
+ }
+ })
+
+ expect(wrapper.html()).to.eql(compwrap(expected))
+ })
+
+ it('removes mentions from the end of the hellpost (
)', () => {
+ const html = [
+ 'How are you doing today, fine gentlemen?',
+ [
+ makeMention('John'),
+ makeMention('Josh'),
+ makeMention('Jeremy')
+ ].join('')
+ ].join('
')
+ const expected = [
+ 'How are you doing today, fine gentlemen?',
+ // TODO fix this extra line somehow?
+ '
'
+ ].join('')
+
+ const wrapper = shallowMount(RichContent, {
+ localVue,
+ propsData: {
+ handleLinks: true,
+ greentext: true,
+ emoji: [],
+ html
+ }
+ })
+
+ expect(wrapper.html()).to.eql(compwrap(expected))
+ })
+
+ it('removes mentions from the end of the hellpost (\\n)', () => {
+ const html = [
+ 'How are you doing today, fine gentlemen?',
+ [
+ makeMention('John'),
+ makeMention('Josh'),
+ makeMention('Jeremy')
+ ].join('')
+ ].join('\n')
+ const expected = [
+ 'How are you doing today, fine gentlemen?',
+ // TODO fix this extra line somehow?
+ ''
+ ].join('\n')
+
+ const wrapper = shallowMount(RichContent, {
+ localVue,
+ propsData: {
+ handleLinks: true,
+ greentext: true,
+ emoji: [],
+ html
+ }
+ })
+
+ expect(wrapper.html()).to.eql(compwrap(expected))
+ })
+
+ it('Does not remove mentions in the middle or at the end of text string', () => {
+ const html = [
+ [
+ makeMention('Jack'),
+ 'let\'s meet up with ',
+ makeMention('Janet')
+ ].join(''),
+ [
+ 'cc: ',
+ makeMention('John'),
+ makeMention('Josh'),
+ makeMention('Jeremy')
+ ].join('')
+ ].join('\n')
+ const expected = [
+ [
+ removedMentionSpan,
+ 'let\'s meet up with ',
+ stubMention('Janet')
+ ].join(''),
+ [
+ 'cc: ',
+ stubMention('John'),
+ stubMention('Josh'),
+ stubMention('Jeremy')
+ ].join('')
+ ].join('\n')
+
+ const wrapper = shallowMount(RichContent, {
+ localVue,
+ propsData: {
+ handleLinks: true,
+ greentext: true,
+ emoji: [],
+ html
+ }
+ })
+
+ expect(wrapper.html()).to.eql(compwrap(expected))
+ })
+
+ it('removes mentions from the end if there\'s only one first mention', () => {
+ const html = [
+ p(
+ makeMention('Todd'),
+ 'so anyway you are wrong'
+ ),
+ p(
+ makeMention('Tom'),
+ makeMention('Trace'),
+ makeMention('Theodor')
+ )
+ ].join('')
+ const expected = [
+ p(
+ removedMentionSpan,
+ 'so anyway you are wrong'
+ ),
+ // TODO fix this extra line somehow?
+ p()
+ ].join('')
+
+ const wrapper = shallowMount(RichContent, {
+ localVue,
+ propsData: {
+ handleLinks: true,
+ greentext: true,
+ emoji: [],
+ html
+ }
+ })
+
+ expect(wrapper.html()).to.eql(compwrap(expected))
+ })
+
+ it('does not remove mentions from the end if there\'s more than one first mention', () => {
+ const html = [
+ p(
+ makeMention('Zacharie'),
+ makeMention('Zinaide'),
+ 'you guys have cool names, and so do these guys: '
+ ),
+ p(
+ makeMention('Watson'),
+ makeMention('Wallace'),
+ makeMention('Wakamoto')
+ )
+ ].join('')
+ const expected = [
+ p(
+ removedMentionSpan,
+ removedMentionSpan,
+ 'you guys have cool names, and so do these guys: '
+ ),
+ p(
+ lastMentions(
+ stubMention('Watson'),
+ stubMention('Wallace'),
+ stubMention('Wakamoto')
+ )
+ )
+ ].join('')
+
+ const wrapper = shallowMount(RichContent, {
+ localVue,
+ propsData: {
+ handleLinks: true,
+ greentext: true,
+ emoji: [],
+ html
+ }
+ })
+
+ expect(wrapper.html()).to.eql(compwrap(expected))
+ })
+
+ it('Does not touch links if link handling is disabled', () => {
+ const html = [
+ [
+ makeMention('Jack'),
+ 'let\'s meet up with ',
+ makeMention('Janet')
+ ].join(''),
+ [
+ makeMention('John'),
+ makeMention('Josh'),
+ makeMention('Jeremy')
+ ].join('')
+ ].join('\n')
+
+ const wrapper = shallowMount(RichContent, {
+ localVue,
+ propsData: {
+ handleLinks: false,
+ greentext: true,
+ emoji: [],
+ html
+ }
+ })
+
+ expect(wrapper.html()).to.eql(compwrap(html))
+ })
+
+ it('Adds greentext and cyantext to the post', () => {
+ const html = [
+ '>preordering videogames',
+ '>any year'
+ ].join('\n')
+ const expected = [
+ '>preordering videogames',
+ '>any year'
+ ].join('\n')
+
+ const wrapper = shallowMount(RichContent, {
+ localVue,
+ propsData: {
+ handleLinks: false,
+ greentext: true,
+ emoji: [],
+ html
+ }
+ })
+
+ expect(wrapper.html()).to.eql(compwrap(expected))
+ })
+
+ it('Does not add greentext and cyantext if setting is set to false', () => {
+ const html = [
+ '>preordering videogames',
+ '>any year'
+ ].join('\n')
+
+ const wrapper = shallowMount(RichContent, {
+ localVue,
+ propsData: {
+ handleLinks: false,
+ greentext: false,
+ emoji: [],
+ html
+ }
+ })
+
+ expect(wrapper.html()).to.eql(compwrap(html))
+ })
+
+ it('Adds emoji to post', () => {
+ const html = p('Ebin :DDDD :spurdo:')
+ const expected = p(
+ 'Ebin :DDDD ',
+ ''
+ )
+
+ const wrapper = shallowMount(RichContent, {
+ localVue,
+ propsData: {
+ handleLinks: false,
+ greentext: false,
+ emoji: [{ url: 'about:blank', shortcode: 'spurdo' }],
+ html
+ }
+ })
+
+ expect(wrapper.html()).to.eql(compwrap(expected))
+ })
+
+ it('Doesn\'t add nonexistent emoji to post', () => {
+ const html = p('Lol :lol:')
+
+ const wrapper = shallowMount(RichContent, {
+ localVue,
+ propsData: {
+ handleLinks: false,
+ greentext: false,
+ emoji: [],
+ html
+ }
+ })
+
+ expect(wrapper.html()).to.eql(compwrap(html))
+ })
+})
diff --git a/test/unit/specs/services/html_converter/html_line_converter.spec.js b/test/unit/specs/services/html_converter/html_line_converter.spec.js
index 532ea187..9485233f 100644
--- a/test/unit/specs/services/html_converter/html_line_converter.spec.js
+++ b/test/unit/specs/services/html_converter/html_line_converter.spec.js
@@ -2,7 +2,7 @@ import { convertHtmlToLines } from 'src/services/html_converter/html_line_conver
const mapOnlyText = (processor) => (input) => input.text ? processor(input.text) : input
-describe('TinyPostHTMLProcessor', () => {
+describe('html_line_converter', () => {
describe('with processor that keeps original line should not make any changes to HTML when', () => {
const processorKeep = (line) => line
it('fed with regular HTML with newlines', () => {
diff --git a/test/unit/specs/services/html_converter/html_tree_converter.spec.js b/test/unit/specs/services/html_converter/html_tree_converter.spec.js
index a54745c3..7283021b 100644
--- a/test/unit/specs/services/html_converter/html_tree_converter.spec.js
+++ b/test/unit/specs/services/html_converter/html_tree_converter.spec.js
@@ -1,6 +1,6 @@
-import { convertHtmlToTree, processTextForEmoji, getAttrs } from 'src/services/html_converter/html_tree_converter.service.js'
+import { convertHtmlToTree } from 'src/services/html_converter/html_tree_converter.service.js'
-describe('MiniHtmlConverter', () => {
+describe('html_tree_converter', () => {
describe('convertHtmlToTree', () => {
it('converts html into a tree structure', () => {
const input = '1
2
3
45'
@@ -129,38 +129,4 @@ describe('MiniHtmlConverter', () => {
])
})
})
-
- describe('processTextForEmoji', () => {
- it('processes all emoji in text', () => {
- const input = 'Hello from finland! :lol: We have best water! :lmao:'
- const emojis = [
- { shortcode: 'lol', src: 'LOL' },
- { shortcode: 'lmao', src: 'LMAO' }
- ]
- const processor = ({ shortcode, src }) => ({ shortcode, src })
- expect(processTextForEmoji(input, emojis, processor)).to.eql([
- 'Hello from finland! ',
- { shortcode: 'lol', src: 'LOL' },
- ' We have best water! ',
- { shortcode: 'lmao', src: 'LMAO' }
- ])
- })
- it('leaves text as is', () => {
- const input = 'Number one: that\'s terror'
- const emojis = []
- const processor = ({ shortcode, src }) => ({ shortcode, src })
- expect(processTextForEmoji(input, emojis, processor)).to.eql([
- 'Number one: that\'s terror'
- ])
- })
- })
-
- describe('getAttrs', () => {
- it('extracts arguments from tag', () => {
- const input = '
'
- const output = { src: 'boop', cool: true, ebin: 'true' }
-
- expect(getAttrs(input)).to.eql(output)
- })
- })
})
diff --git a/test/unit/specs/services/html_converter/utility.spec.js b/test/unit/specs/services/html_converter/utility.spec.js
new file mode 100644
index 00000000..cf6fd99b
--- /dev/null
+++ b/test/unit/specs/services/html_converter/utility.spec.js
@@ -0,0 +1,37 @@
+import { processTextForEmoji, getAttrs } from 'src/services/html_converter/utility.service.js'
+
+describe('html_converter utility', () => {
+ describe('processTextForEmoji', () => {
+ it('processes all emoji in text', () => {
+ const input = 'Hello from finland! :lol: We have best water! :lmao:'
+ const emojis = [
+ { shortcode: 'lol', src: 'LOL' },
+ { shortcode: 'lmao', src: 'LMAO' }
+ ]
+ const processor = ({ shortcode, src }) => ({ shortcode, src })
+ expect(processTextForEmoji(input, emojis, processor)).to.eql([
+ 'Hello from finland! ',
+ { shortcode: 'lol', src: 'LOL' },
+ ' We have best water! ',
+ { shortcode: 'lmao', src: 'LMAO' }
+ ])
+ })
+ it('leaves text as is', () => {
+ const input = 'Number one: that\'s terror'
+ const emojis = []
+ const processor = ({ shortcode, src }) => ({ shortcode, src })
+ expect(processTextForEmoji(input, emojis, processor)).to.eql([
+ 'Number one: that\'s terror'
+ ])
+ })
+ })
+
+ describe('getAttrs', () => {
+ it('extracts arguments from tag', () => {
+ const input = '
'
+ const output = { src: 'boop', cool: true, ebin: 'true' }
+
+ expect(getAttrs(input)).to.eql(output)
+ })
+ })
+})
--
cgit v1.2.3-70-g09d2
From bebafa1a2c38972245d37de70f4aec4bfb2083fd Mon Sep 17 00:00:00 2001
From: Henry Jameson
Date: Sun, 13 Jun 2021 13:29:26 +0300
Subject: refactored line converter, untied its logic from greentexting, better
handling of broken cases
---
src/components/rich_content/rich_content.jsx | 10 +++-
.../html_converter/html_line_converter.service.js | 55 ++++++++++++++++++----
.../html_converter/html_line_converter.spec.js | 19 ++++++--
3 files changed, 68 insertions(+), 16 deletions(-)
(limited to 'src/services/html_converter')
diff --git a/src/components/rich_content/rich_content.jsx b/src/components/rich_content/rich_content.jsx
index e188763f..328e9201 100644
--- a/src/components/rich_content/rich_content.jsx
+++ b/src/components/rich_content/rich_content.jsx
@@ -246,6 +246,7 @@ const getLinkData = (attrs, children, index) => {
*/
export const preProcessPerLine = (html, greentext, handleLinks) => {
const lastMentions = []
+ const greentextHandle = new Set(['p', 'div'])
let nonEmptyIndex = -1
const newHtml = convertHtmlToLines(html).reverse().map((item, index, array) => {
@@ -256,7 +257,14 @@ export const preProcessPerLine = (html, greentext, handleLinks) => {
nonEmptyIndex += 1
// Greentext stuff
- if (greentext && (string.includes('>') || string.includes('<'))) {
+ if (
+ // Only if greentext is engaged
+ greentext &&
+ // Only handle p's and divs. Don't want to affect blocquotes, code etc
+ item.level.every(l => greentextHandle.has(l)) &&
+ // Only if line begins with '>' or '<'
+ (string.includes('>') || string.includes('<'))
+ ) {
const cleanedString = string.replace(/<[^>]+?>/gi, '') // remove all tags
.replace(/@\w+/gi, '') // remove mentions (even failed ones)
.trim()
diff --git a/src/services/html_converter/html_line_converter.service.js b/src/services/html_converter/html_line_converter.service.js
index e448d5cd..f43d162a 100644
--- a/src/services/html_converter/html_line_converter.service.js
+++ b/src/services/html_converter/html_line_converter.service.js
@@ -19,9 +19,42 @@ import { getTagName } from './utility.service.js'
* @return {(string|{ text: string })[]} processed html in form of a list.
*/
export const convertHtmlToLines = (html) => {
- const ignoredTags = new Set(['code', 'blockquote'])
- const handledTags = new Set(['p', 'br', 'div', 'pre', 'code', 'blockquote'])
- const openCloseTags = new Set(['p', 'div', 'pre', 'code', 'blockquote'])
+ // Elements that are implicitly self-closing
+ // https://developer.mozilla.org/en-US/docs/Glossary/empty_element
+ const emptyElements = new Set([
+ 'area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input',
+ 'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr'
+ ])
+ // Block-level element (they make a visual line)
+ // https://developer.mozilla.org/en-US/docs/Web/HTML/Block-level_elements
+ const blockElements = new Set([
+ 'address', 'article', 'aside', 'blockquote', 'details', 'dialog', 'dd',
+ 'div', 'dl', 'dt', 'fieldset', 'figcaption', 'figure', 'footer', 'form',
+ 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'header', 'hgroup', 'hr', 'li', 'main',
+ 'nav', 'ol', 'p', 'pre', 'section', 'table', 'ul'
+ ])
+ // br is very weird in a way that it's technically not block-level, it's
+ // essentially converted to a \n (or \r\n). There's also wbr but it doesn't
+ // guarantee linebreak, only suggest it.
+ const linebreakElements = new Set(['br'])
+
+ const visualLineElements = new Set([
+ ...blockElements.values(),
+ ...linebreakElements.values()
+ ])
+
+ // All block-level elements that aren't empty elements, i.e. not
+ const nonEmptyElements = new Set(visualLineElements)
+ // Difference
+ for (let elem of emptyElements) {
+ nonEmptyElements.delete(elem)
+ }
+
+ // All elements that we are recognizing
+ const allElements = new Set([
+ ...nonEmptyElements.values(),
+ ...emptyElements.values()
+ ])
let buffer = [] // Current output buffer
const level = [] // How deep we are in tags and which tags were there
@@ -29,8 +62,8 @@ export const convertHtmlToLines = (html) => {
let tagBuffer = null // Current tag buffer, if null = we are not currently reading a tag
const flush = () => { // Processes current line buffer, adds it to output buffer and clears line buffer
- if (textBuffer.trim().length > 0 && !level.some(l => ignoredTags.has(l))) {
- buffer.push({ text: textBuffer })
+ if (textBuffer.trim().length > 0) {
+ buffer.push({ level: [...level], text: textBuffer })
} else {
buffer.push(textBuffer)
}
@@ -49,10 +82,12 @@ export const convertHtmlToLines = (html) => {
}
const handleClose = (tag) => { // handles closing tags
- flush()
- buffer.push(tag)
if (level[0] === getTagName(tag)) {
+ flush()
+ buffer.push(tag)
level.shift()
+ } else { // Broken case
+ textBuffer += tag
}
}
@@ -67,10 +102,10 @@ export const convertHtmlToLines = (html) => {
const tagFull = tagBuffer
tagBuffer = null
const tagName = getTagName(tagFull)
- if (handledTags.has(tagName)) {
- if (tagName === 'br') {
+ if (allElements.has(tagName)) {
+ if (linebreakElements.has(tagName)) {
handleBr(tagFull)
- } else if (openCloseTags.has(tagName)) {
+ } else if (nonEmptyElements.has(tagName)) {
if (tagFull[1] === '/') {
handleClose(tagFull)
} else if (tagFull[tagFull.length - 2] === '/') {
diff --git a/test/unit/specs/services/html_converter/html_line_converter.spec.js b/test/unit/specs/services/html_converter/html_line_converter.spec.js
index 9485233f..c8c89700 100644
--- a/test/unit/specs/services/html_converter/html_line_converter.spec.js
+++ b/test/unit/specs/services/html_converter/html_line_converter.spec.js
@@ -1,8 +1,17 @@
import { convertHtmlToLines } from 'src/services/html_converter/html_line_converter.service.js'
-const mapOnlyText = (processor) => (input) => input.text ? processor(input.text) : input
-
-describe('html_line_converter', () => {
+const greentextHandle = new Set(['p', 'div'])
+const mapOnlyText = (processor) => (input) => {
+ if (input.text && input.level.every(l => greentextHandle.has(l))) {
+ return processor(input.text)
+ } else if (input.text) {
+ return input.text
+ } else {
+ return input
+ }
+}
+
+describe.only('html_line_converter', () => {
describe('with processor that keeps original line should not make any changes to HTML when', () => {
const processorKeep = (line) => line
it('fed with regular HTML with newlines', () => {
@@ -81,7 +90,7 @@ describe('html_line_converter', () => {
it('fed with very broken HTML with broken composition', () => {
const input = '
lmao what whats going on wha
'
- const output = '
_
__
'
+ const output = '_
_
'
const result = convertHtmlToLines(input)
const comparableResult = result.map(mapOnlyText(processorReplace)).join('')
expect(comparableResult).to.eql(output)
@@ -111,7 +120,7 @@ describe('html_line_converter', () => {
expect(comparableResult).to.eql(output)
})
- it('fed with maybe valid HTML? self-closing divs and ps', () => {
+ it('fed with maybe valid HTML? (XHTML) self-closing divs and ps', () => {
const input = 'a
what now
?'
const output = '_
_
_'
const result = convertHtmlToLines(input)
--
cgit v1.2.3-70-g09d2
From 8fe4355a6b84ae81b54228e6749a4ab82966ff2e Mon Sep 17 00:00:00 2001
From: Henry Jameson
Date: Fri, 18 Jun 2021 21:29:47 +0300
Subject: fix rich images
---
src/services/html_converter/html_line_converter.service.js | 2 ++
.../unit/specs/services/html_converter/html_line_converter.spec.js | 7 +++++++
2 files changed, 9 insertions(+)
(limited to 'src/services/html_converter')
diff --git a/src/services/html_converter/html_line_converter.service.js b/src/services/html_converter/html_line_converter.service.js
index f43d162a..74103b02 100644
--- a/src/services/html_converter/html_line_converter.service.js
+++ b/src/services/html_converter/html_line_converter.service.js
@@ -114,6 +114,8 @@ export const convertHtmlToLines = (html) => {
} else {
handleOpen(tagFull)
}
+ } else {
+ textBuffer += tagFull
}
} else {
textBuffer += tagFull
diff --git a/test/unit/specs/services/html_converter/html_line_converter.spec.js b/test/unit/specs/services/html_converter/html_line_converter.spec.js
index de7c7fc2..86bd7e8b 100644
--- a/test/unit/specs/services/html_converter/html_line_converter.spec.js
+++ b/test/unit/specs/services/html_converter/html_line_converter.spec.js
@@ -69,6 +69,13 @@ describe('html_line_converter', () => {
const comparableResult = result.map(mapOnlyText(processorKeep)).join('')
expect(comparableResult).to.eql(inputOutput)
})
+
+ it('fed with some recognized but not handled elements', () => {
+ const inputOutput = 'testing images\n\n
'
+ const result = convertHtmlToLines(inputOutput)
+ const comparableResult = result.map(mapOnlyText(processorKeep)).join('')
+ expect(comparableResult).to.eql(inputOutput)
+ })
})
describe('with processor that replaces lines with word "_" should match expected line when', () => {
const processorReplace = (line) => '_'
--
cgit v1.2.3-70-g09d2
From f16658adfc897a3b07ed7f79d872acd2c3837cc8 Mon Sep 17 00:00:00 2001
From: Henry Jameson
Date: Sun, 15 Aug 2021 02:59:14 +0300
Subject: fix tests
---
src/services/html_converter/html_line_converter.service.js | 2 +-
src/services/html_converter/html_tree_converter.service.js | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
(limited to 'src/services/html_converter')
diff --git a/src/services/html_converter/html_line_converter.service.js b/src/services/html_converter/html_line_converter.service.js
index 74103b02..5eeaa7cb 100644
--- a/src/services/html_converter/html_line_converter.service.js
+++ b/src/services/html_converter/html_line_converter.service.js
@@ -18,7 +18,7 @@ import { getTagName } from './utility.service.js'
* @param {Object} input - input data
* @return {(string|{ text: string })[]} processed html in form of a list.
*/
-export const convertHtmlToLines = (html) => {
+export const convertHtmlToLines = (html = '') => {
// Elements that are implicitly self-closing
// https://developer.mozilla.org/en-US/docs/Glossary/empty_element
const emptyElements = new Set([
diff --git a/src/services/html_converter/html_tree_converter.service.js b/src/services/html_converter/html_tree_converter.service.js
index 804d35d7..6a8796c4 100644
--- a/src/services/html_converter/html_tree_converter.service.js
+++ b/src/services/html_converter/html_tree_converter.service.js
@@ -19,7 +19,7 @@ import { getTagName } from './utility.service.js'
* @param {Object} input - input data
* @return {string} processed html
*/
-export const convertHtmlToTree = (html) => {
+export const convertHtmlToTree = (html = '') => {
// Elements that are implicitly self-closing
// https://developer.mozilla.org/en-US/docs/Glossary/empty_element
const emptyElements = new Set([
--
cgit v1.2.3-70-g09d2