diff options
| author | Tianhao Wang <tianhao.wang2@mailbox.tu-dresden.de> | 2023-10-14 00:04:45 +0200 |
|---|---|---|
| committer | Tianhao Wang <tianhao.wang2@mailbox.tu-dresden.de> | 2023-10-14 00:04:45 +0200 |
| commit | 2580e02140f112e8797807620e36712489039754 (patch) | |
| tree | 4956d918ecb3d79ff772c52576bfdee20e88a811 /utils.py | |
init
Diffstat (limited to 'utils.py')
| -rw-r--r-- | utils.py | 9 |
1 files changed, 9 insertions, 0 deletions
diff --git a/utils.py b/utils.py new file mode 100644 index 0000000..aa0f4ed --- /dev/null +++ b/utils.py @@ -0,0 +1,9 @@ +import re +import html + +CLEANR = re.compile('<.*?>') + +def sanitize_html(raw_html): + cleantext = re.sub(CLEANR, '', raw_html) + return html.unescape(cleantext) + |
