# -*- coding: utf-8 -*- """A collection of functions deprecated in requests.utils.""" import re import sys from requests import utils find_charset = re.compile( br']', flags=re.I ).findall find_pragma = re.compile( br']', flags=re.I ).findall find_xml = re.compile( br'^<\?xml.*?encoding=["\']*(.+?)["\'>]' ).findall def get_encodings_from_content(content): """Return encodings from given content string. .. code-block:: python import requests from requests_toolbelt.utils import deprecated r = requests.get(url) encodings = deprecated.get_encodings_from_content(r) :param content: bytestring to extract encodings from :type content: bytes :return: encodings detected in the provided content :rtype: list(str) """ encodings = (find_charset(content) + find_pragma(content) + find_xml(content)) if (3, 0) <= sys.version_info < (4, 0): encodings = [encoding.decode('utf8') for encoding in encodings] return encodings def get_unicode_from_response(response): """Return the requested content back in unicode. This will first attempt to retrieve the encoding from the response headers. If that fails, it will use :func:`requests_toolbelt.utils.deprecated.get_encodings_from_content` to determine encodings from HTML elements. .. code-block:: python import requests from requests_toolbelt.utils import deprecated r = requests.get(url) text = deprecated.get_unicode_from_response(r) :param response: Response object to get unicode content from. :type response: requests.models.Response """ tried_encodings = set() # Try charset from content-type encoding = utils.get_encoding_from_headers(response.headers) if encoding: try: return str(response.content, encoding) except UnicodeError: tried_encodings.add(encoding.lower()) encodings = get_encodings_from_content(response.content) for _encoding in encodings: _encoding = _encoding.lower() if _encoding in tried_encodings: continue try: return str(response.content, _encoding) except UnicodeError: tried_encodings.add(_encoding) # Fall back: if encoding: try: return str(response.content, encoding, errors='replace') except TypeError: pass return response.text