Coverage for src/mlstatpy/nlp/normalize.py: 100%
6 statements
« prev ^ index » next coverage.py v7.1.0, created at 2023-02-27 05:59 +0100
« prev ^ index » next coverage.py v7.1.0, created at 2023-02-27 05:59 +0100
1# -*- coding: utf-8 -*-
2"""
3@file
4@brief Text normalization
5"""
6import unicodedata
9def remove_diacritics(input_str):
10 """
11 remove diacritics
13 @param input_str string to clean
14 @return cleaned string
16 Example::
18 enguérand --> enguerand
20 .. versionadded:: 1.0
21 """
22 nkfd_form = unicodedata.normalize('NFKD', input_str)
23 only_ascii = nkfd_form.encode('ASCII', 'ignore')
24 return only_ascii.decode("utf8")