1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
|
import { getTagName } from './utility.service.js'
import { unescape } from 'lodash'
/**
* This is a not-so-tiny purpose-built HTML parser/processor. This parses html
* and converts it into a tree structure representing tag openers/closers and
* children.
*
* Structure follows this pattern: [opener, [...children], closer] except root
* node which is just [...children]. Text nodes can only be within children and
* are represented as strings.
*
* Intended use is to convert HTML structure and then recursively iterate over it
* most likely using a map. Very useful for dynamically rendering html replacing
* tags with JSX elements in a render function.
*
* known issue: doesn't handle CDATA so CDATA might not work well
* known issue: doesn't handle HTML comments
*
* @param {Object} input - input data
* @return {string} processed html
*/
export const convertHtmlToTree = (html = '') => {
// Elements that are implicitly self-closing
// https://developer.mozilla.org/en-US/docs/Glossary/empty_element
const emptyElements = new Set([
'area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input',
'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr'
])
// TODO For future - also parse HTML5 multi-source components?
const buffer = [] // Current output buffer
const levels = [['', buffer]] // How deep we are in tags and which tags were there
let textBuffer = '' // Current line content
let tagBuffer = null // Current tag buffer, if null = we are not currently reading a tag
const getCurrentBuffer = () => {
return levels[levels.length - 1][1]
}
const flushText = () => { // Processes current line buffer, adds it to output buffer and clears line buffer
if (textBuffer === '') return
getCurrentBuffer().push(textBuffer)
textBuffer = ''
}
const handleSelfClosing = (tag) => {
getCurrentBuffer().push([tag])
}
const handleOpen = (tag) => {
const curBuf = getCurrentBuffer()
const newLevel = [unescape(tag), []]
levels.push(newLevel)
curBuf.push(newLevel)
}
const handleClose = (tag) => {
const currentTag = levels[levels.length - 1]
if (getTagName(levels[levels.length - 1][0]) === getTagName(tag)) {
currentTag.push(tag)
levels.pop()
} else {
getCurrentBuffer().push(tag)
}
}
for (let i = 0; i < html.length; i++) {
const char = html[i]
if (char === '<' && tagBuffer === null) {
flushText()
tagBuffer = char
} else if (char !== '>' && tagBuffer !== null) {
tagBuffer += char
} else if (char === '>' && tagBuffer !== null) {
tagBuffer += char
const tagFull = tagBuffer
tagBuffer = null
const tagName = getTagName(tagFull)
if (tagFull[1] === '/') {
handleClose(tagFull)
} else if (emptyElements.has(tagName) || tagFull[tagFull.length - 2] === '/') {
// self-closing
handleSelfClosing(tagFull)
} else {
handleOpen(tagFull)
}
} else {
textBuffer += char
}
}
if (tagBuffer) {
textBuffer += tagBuffer
}
flushText()
return buffer
}
|