diff options
| author | Henry Jameson <me@hjkos.com> | 2022-03-16 21:00:20 +0200 |
|---|---|---|
| committer | Henry Jameson <me@hjkos.com> | 2022-03-16 21:00:20 +0200 |
| commit | cd4ad2df11369087b1f39bcaac1bbe258e00580d (patch) | |
| tree | cb2131ccac2501587af7283e6ecd32d3d803e44e /src/services/html_converter/html_line_converter.service.js | |
| parent | 8a9115b58e020f750366e87bb4fd3483d6b62b03 (diff) | |
| parent | b632d740c13ff4e5c98a7a101f26ca60cd2629bb (diff) | |
Merge remote-tracking branch 'origin/develop' into vue3-again
* origin/develop: (475 commits)
Apply 1 suggestion(s) to 1 file(s)
Update dependency @ungap/event-target to v0.2.3
Update package.json
fix broken icons after FA upgrade
Update Font Awesome
Update dependency webpack-dev-middleware to v3.7.3
Update dependency vuelidate to v0.7.7
Pin dependency @kazvmoe-infra/pinch-zoom-element to 1.2.0
lint
Make media modal buttons larger
Add English translation for hide tooltip
Add hide button to media modal
Lint
Prevent hiding media viewer if swiped over SwipeClick
Fix webkit image blurs
Fix video in media modal not displaying properly
Add changelog for https://git.pleroma.social/pleroma/pleroma-fe/-/merge_requests/1403
Remove image box-shadow in media modal
Clean up debug code for image pinch zoom
Bump @kazvmoe-infra/pinch-zoom-element to 1.2.0 on npm
...
Diffstat (limited to 'src/services/html_converter/html_line_converter.service.js')
| -rw-r--r-- | src/services/html_converter/html_line_converter.service.js | 136 |
1 files changed, 136 insertions, 0 deletions
diff --git a/src/services/html_converter/html_line_converter.service.js b/src/services/html_converter/html_line_converter.service.js new file mode 100644 index 00000000..5eeaa7cb --- /dev/null +++ b/src/services/html_converter/html_line_converter.service.js @@ -0,0 +1,136 @@ +import { getTagName } from './utility.service.js' + +/** + * This is a tiny purpose-built HTML parser/processor. This basically detects + * any type of visual newline and converts entire HTML into a array structure. + * + * Text nodes are represented as object with single property - text - containing + * the visual line. Intended usage is to process the array with .map() in which + * map function returns a string and resulting array can be converted back to html + * with a .join(''). + * + * Generally this isn't very useful except for when you really need to either + * modify visual lines (greentext i.e. simple quoting) or do something with + * first/last line. + * + * known issue: doesn't handle CDATA so nested CDATA might not work well + * + * @param {Object} input - input data + * @return {(string|{ text: string })[]} processed html in form of a list. + */ +export const convertHtmlToLines = (html = '') => { + // Elements that are implicitly self-closing + // https://developer.mozilla.org/en-US/docs/Glossary/empty_element + const emptyElements = new Set([ + 'area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', + 'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr' + ]) + // Block-level element (they make a visual line) + // https://developer.mozilla.org/en-US/docs/Web/HTML/Block-level_elements + const blockElements = new Set([ + 'address', 'article', 'aside', 'blockquote', 'details', 'dialog', 'dd', + 'div', 'dl', 'dt', 'fieldset', 'figcaption', 'figure', 'footer', 'form', + 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'header', 'hgroup', 'hr', 'li', 'main', + 'nav', 'ol', 'p', 'pre', 'section', 'table', 'ul' + ]) + // br is very weird in a way that it's technically not block-level, it's + // essentially converted to a \n (or \r\n). There's also wbr but it doesn't + // guarantee linebreak, only suggest it. + const linebreakElements = new Set(['br']) + + const visualLineElements = new Set([ + ...blockElements.values(), + ...linebreakElements.values() + ]) + + // All block-level elements that aren't empty elements, i.e. not <hr> + const nonEmptyElements = new Set(visualLineElements) + // Difference + for (let elem of emptyElements) { + nonEmptyElements.delete(elem) + } + + // All elements that we are recognizing + const allElements = new Set([ + ...nonEmptyElements.values(), + ...emptyElements.values() + ]) + + let buffer = [] // Current output buffer + const level = [] // How deep we are in tags and which tags were there + let textBuffer = '' // Current line content + let tagBuffer = null // Current tag buffer, if null = we are not currently reading a tag + + const flush = () => { // Processes current line buffer, adds it to output buffer and clears line buffer + if (textBuffer.trim().length > 0) { + buffer.push({ level: [...level], text: textBuffer }) + } else { + buffer.push(textBuffer) + } + textBuffer = '' + } + + const handleBr = (tag) => { // handles single newlines/linebreaks/selfclosing + flush() + buffer.push(tag) + } + + const handleOpen = (tag) => { // handles opening tags + flush() + buffer.push(tag) + level.unshift(getTagName(tag)) + } + + const handleClose = (tag) => { // handles closing tags + if (level[0] === getTagName(tag)) { + flush() + buffer.push(tag) + level.shift() + } else { // Broken case + textBuffer += tag + } + } + + for (let i = 0; i < html.length; i++) { + const char = html[i] + if (char === '<' && tagBuffer === null) { + tagBuffer = char + } else if (char !== '>' && tagBuffer !== null) { + tagBuffer += char + } else if (char === '>' && tagBuffer !== null) { + tagBuffer += char + const tagFull = tagBuffer + tagBuffer = null + const tagName = getTagName(tagFull) + if (allElements.has(tagName)) { + if (linebreakElements.has(tagName)) { + handleBr(tagFull) + } else if (nonEmptyElements.has(tagName)) { + if (tagFull[1] === '/') { + handleClose(tagFull) + } else if (tagFull[tagFull.length - 2] === '/') { + // self-closing + handleBr(tagFull) + } else { + handleOpen(tagFull) + } + } else { + textBuffer += tagFull + } + } else { + textBuffer += tagFull + } + } else if (char === '\n') { + handleBr(char) + } else { + textBuffer += char + } + } + if (tagBuffer) { + textBuffer += tagBuffer + } + + flush() + + return buffer +} |
