Normalize unicode - toot - Unnamed repository; edit this file 'description' to name the repository.
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) LICENSE
       ---
 (DIR) commit 2ecc6a28c6b1cd2efd4bd94d801954e87ab1b320
 (DIR) parent cb1f7b4e61e66ceecf91fe286ac9f44166ef3b25
 (HTM) Author: Ivan Habunek <ivan@habunek.com>
       Date:   Sun, 21 Jan 2018 16:39:40 +0100
       
       Normalize unicode
       
       Diffstat:
         toot/utils.py                       |       5 ++++-
       
       1 file changed, 4 insertions(+), 1 deletion(-)
       ---
 (DIR) diff --git a/toot/utils.py b/toot/utils.py
       @@ -2,6 +2,7 @@
        
        import re
        import socket
       +import unicodedata
        
        from bs4 import BeautifulSoup
        
       @@ -10,7 +11,9 @@ from toot.exceptions import ConsoleError
        
        def get_text(html):
            """Converts html to text, strips all tags."""
       -    return BeautifulSoup(html, "html.parser").get_text().replace('&apos;', "'")
       +    text = BeautifulSoup(html, "html.parser").get_text().replace('&apos;', "'")
       +
       +    return unicodedata.normalize('NFKC', text)
        
        
        def parse_html(html):