diff options
| author | Shpuld Shpludson <shp@cock.li> | 2020-01-15 16:35:13 +0000 |
|---|---|---|
| committer | Shpuld Shpludson <shp@cock.li> | 2020-01-15 16:35:13 +0000 |
| commit | 3ab128e73924ce34d190ff609cb9b081cdffe402 (patch) | |
| tree | bba013a7d61688b90c1f59a8f9fa6c3323b72a05 /src/services/tiny_post_html_processor | |
| parent | 7c26435e66fd7e142ea4b88fbe51eede32eeb5ce (diff) | |
| parent | 7397636914a9d3e7fd30373034c25175273ab808 (diff) | |
Merge branch 'develop' into 'master'
`master` refresh with `develop`
See merge request pleroma/pleroma-fe!1028
Diffstat (limited to 'src/services/tiny_post_html_processor')
| -rw-r--r-- | src/services/tiny_post_html_processor/tiny_post_html_processor.service.js | 94 |
1 files changed, 94 insertions, 0 deletions
diff --git a/src/services/tiny_post_html_processor/tiny_post_html_processor.service.js b/src/services/tiny_post_html_processor/tiny_post_html_processor.service.js new file mode 100644 index 00000000..de6f20ef --- /dev/null +++ b/src/services/tiny_post_html_processor/tiny_post_html_processor.service.js @@ -0,0 +1,94 @@ +/** + * This is a tiny purpose-built HTML parser/processor. This basically detects any type of visual newline and + * allows it to be processed, useful for greentexting, mostly + * + * known issue: doesn't handle CDATA so nested CDATA might not work well + * + * @param {Object} input - input data + * @param {(string) => string} processor - function that will be called on every line + * @return {string} processed html + */ +export const processHtml = (html, processor) => { + const handledTags = new Set(['p', 'br', 'div']) + const openCloseTags = new Set(['p', 'div']) + + let buffer = '' // Current output buffer + const level = [] // How deep we are in tags and which tags were there + let textBuffer = '' // Current line content + let tagBuffer = null // Current tag buffer, if null = we are not currently reading a tag + + // Extracts tag name from tag, i.e. <span a="b"> => span + const getTagName = (tag) => { + const result = /(?:<\/(\w+)>|<(\w+)\s?[^/]*?\/?>)/gi.exec(tag) + return result && (result[1] || result[2]) + } + + const flush = () => { // Processes current line buffer, adds it to output buffer and clears line buffer + if (textBuffer.trim().length > 0) { + buffer += processor(textBuffer) + } else { + buffer += textBuffer + } + textBuffer = '' + } + + const handleBr = (tag) => { // handles single newlines/linebreaks/selfclosing + flush() + buffer += tag + } + + const handleOpen = (tag) => { // handles opening tags + flush() + buffer += tag + level.push(tag) + } + + const handleClose = (tag) => { // handles closing tags + flush() + buffer += tag + if (level[level.length - 1] === tag) { + level.pop() + } + } + + for (let i = 0; i < html.length; i++) { + const char = html[i] + if (char === '<' && tagBuffer === null) { + tagBuffer = char + } else if (char !== '>' && tagBuffer !== null) { + tagBuffer += char + } else if (char === '>' && tagBuffer !== null) { + tagBuffer += char + const tagFull = tagBuffer + tagBuffer = null + const tagName = getTagName(tagFull) + if (handledTags.has(tagName)) { + if (tagName === 'br') { + handleBr(tagFull) + } else if (openCloseTags.has(tagName)) { + if (tagFull[1] === '/') { + handleClose(tagFull) + } else if (tagFull[tagFull.length - 2] === '/') { + // self-closing + handleBr(tagFull) + } else { + handleOpen(tagFull) + } + } + } else { + textBuffer += tagFull + } + } else if (char === '\n') { + handleBr(char) + } else { + textBuffer += char + } + } + if (tagBuffer) { + textBuffer += tagBuffer + } + + flush() + + return buffer +} |
