From 2580e02140f112e8797807620e36712489039754 Mon Sep 17 00:00:00 2001 From: Tianhao Wang Date: Sat, 14 Oct 2023 00:04:45 +0200 Subject: init --- utils.py | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 utils.py (limited to 'utils.py') diff --git a/utils.py b/utils.py new file mode 100644 index 0000000..aa0f4ed --- /dev/null +++ b/utils.py @@ -0,0 +1,9 @@ +import re +import html + +CLEANR = re.compile('<.*?>') + +def sanitize_html(raw_html): + cleantext = re.sub(CLEANR, '', raw_html) + return html.unescape(cleantext) + -- cgit v1.2.3-70-g09d2