diff options
| author | Henry Jameson <me@hjkos.com> | 2021-06-12 19:47:23 +0300 |
|---|---|---|
| committer | Henry Jameson <me@hjkos.com> | 2021-06-12 19:54:30 +0300 |
| commit | cd4455675024a3dfc8930184114d5f92438d0466 (patch) | |
| tree | 14a11f546ab86fc1f5a2d288344f3ff7c7e246a5 /src/services/html_converter | |
| parent | ca6c7d5b10e48299dcb0ee65248de14f27ed78c8 (diff) | |
restructure and tests
squash! restructure and tests
Diffstat (limited to 'src/services/html_converter')
| -rw-r--r-- | src/services/html_converter/html_line_converter.service.js | 8 | ||||
| -rw-r--r-- | src/services/html_converter/html_tree_converter.service.js | 53 | ||||
| -rw-r--r-- | src/services/html_converter/utility.service.js | 73 |
3 files changed, 77 insertions, 57 deletions
diff --git a/src/services/html_converter/html_line_converter.service.js b/src/services/html_converter/html_line_converter.service.js index d8f5ecb8..e448d5cd 100644 --- a/src/services/html_converter/html_line_converter.service.js +++ b/src/services/html_converter/html_line_converter.service.js @@ -1,3 +1,5 @@ +import { getTagName } from './utility.service.js' + /** * This is a tiny purpose-built HTML parser/processor. This basically detects * any type of visual newline and converts entire HTML into a array structure. @@ -26,12 +28,6 @@ export const convertHtmlToLines = (html) => { let textBuffer = '' // Current line content let tagBuffer = null // Current tag buffer, if null = we are not currently reading a tag - // Extracts tag name from tag, i.e. <span a="b"> => span - const getTagName = (tag) => { - const result = /(?:<\/(\w+)>|<(\w+)\s?[^/]*?\/?>)/gi.exec(tag) - return result && (result[1] || result[2]) - } - const flush = () => { // Processes current line buffer, adds it to output buffer and clears line buffer if (textBuffer.trim().length > 0 && !level.some(l => ignoredTags.has(l))) { buffer.push({ text: textBuffer }) diff --git a/src/services/html_converter/html_tree_converter.service.js b/src/services/html_converter/html_tree_converter.service.js index badd473a..804d35d7 100644 --- a/src/services/html_converter/html_tree_converter.service.js +++ b/src/services/html_converter/html_tree_converter.service.js @@ -1,3 +1,5 @@ +import { getTagName } from './utility.service.js' + /** * This is a not-so-tiny purpose-built HTML parser/processor. This parses html * and converts it into a tree structure representing tag openers/closers and @@ -93,54 +95,3 @@ export const convertHtmlToTree = (html) => { flushText() return buffer } - -// Extracts tag name from tag, i.e. <span a="b"> => span -export const getTagName = (tag) => { - const result = /(?:<\/(\w+)>|<(\w+)\s?.*?\/?>)/gi.exec(tag) - return result && (result[1] || result[2]) -} - -export const processTextForEmoji = (text, emojis, processor) => { - const buffer = [] - let textBuffer = '' - for (let i = 0; i < text.length; i++) { - const char = text[i] - if (char === ':') { - const next = text.slice(i + 1) - let found = false - for (let emoji of emojis) { - if (next.slice(0, emoji.shortcode.length + 1) === (emoji.shortcode + ':')) { - found = emoji - break - } - } - if (found) { - buffer.push(textBuffer) - textBuffer = '' - buffer.push(processor(found)) - i += found.shortcode.length + 1 - } else { - textBuffer += char - } - } else { - textBuffer += char - } - } - if (textBuffer) buffer.push(textBuffer) - return buffer -} - -export const getAttrs = tag => { - const innertag = tag - .substring(1, tag.length - 1) - .replace(new RegExp('^' + getTagName(tag)), '') - .replace(/\/?$/, '') - .trim() - const attrs = Array.from(innertag.matchAll(/([a-z0-9-]+)(?:=("[^"]+?"|'[^']+?'))?/gi)) - .map(([trash, key, value]) => [key, value]) - .map(([k, v]) => { - if (!v) return [k, true] - return [k, v.substring(1, v.length - 1)] - }) - return Object.fromEntries(attrs) -} diff --git a/src/services/html_converter/utility.service.js b/src/services/html_converter/utility.service.js new file mode 100644 index 00000000..4d0c36c2 --- /dev/null +++ b/src/services/html_converter/utility.service.js @@ -0,0 +1,73 @@ +/** + * Extract tag name from tag opener/closer. + * + * @param {String} tag - tag string, i.e. '<a href="...">' + * @return {String} - tagname, i.e. "div" + */ +export const getTagName = (tag) => { + const result = /(?:<\/(\w+)>|<(\w+)\s?.*?\/?>)/gi.exec(tag) + return result && (result[1] || result[2]) +} + +/** + * Extract attributes from tag opener. + * + * @param {String} tag - tag string, i.e. '<a href="...">' + * @return {Object} - map of attributes key = attribute name, value = attribute value + * attributes without values represented as boolean true + */ +export const getAttrs = tag => { + const innertag = tag + .substring(1, tag.length - 1) + .replace(new RegExp('^' + getTagName(tag)), '') + .replace(/\/?$/, '') + .trim() + const attrs = Array.from(innertag.matchAll(/([a-z0-9-]+)(?:=("[^"]+?"|'[^']+?'))?/gi)) + .map(([trash, key, value]) => [key, value]) + .map(([k, v]) => { + if (!v) return [k, true] + return [k, v.substring(1, v.length - 1)] + }) + return Object.fromEntries(attrs) +} + +/** + * Finds shortcodes in text + * + * @param {String} text - original text to find emojis in + * @param {{ url: String, shortcode: Sring }[]} emoji - list of shortcodes to find + * @param {Function} processor - function to call on each encountered emoji, + * function is passed single object containing matching emoji ({ url, shortcode }) + * return value will be inserted into resulting array instead of :shortcode: + * @return {Array} resulting array with non-emoji parts of text and whatever {processor} + * returned for emoji + */ +export const processTextForEmoji = (text, emojis, processor) => { + const buffer = [] + let textBuffer = '' + for (let i = 0; i < text.length; i++) { + const char = text[i] + if (char === ':') { + const next = text.slice(i + 1) + let found = false + for (let emoji of emojis) { + if (next.slice(0, emoji.shortcode.length + 1) === (emoji.shortcode + ':')) { + found = emoji + break + } + } + if (found) { + buffer.push(textBuffer) + textBuffer = '' + buffer.push(processor(found)) + i += found.shortcode.length + 1 + } else { + textBuffer += char + } + } else { + textBuffer += char + } + } + if (textBuffer) buffer.push(textBuffer) + return buffer +} |
