Source code for gtts.tts

# -*- coding: utf-8 -*-
from gtts.tokenizer import pre_processors, Tokenizer, tokenizer_cases
from gtts.utils import _minimize, _len, _clean_tokens, _translate_url
from gtts.lang import tts_langs

from gtts_token import gtts_token
from six.moves import urllib
import urllib3
import requests
import logging

__all__ = ['gTTS', 'gTTSError']

# Logger
log = logging.getLogger(__name__)
log.addHandler(logging.NullHandler())


class Speed:
    """Read Speed

    The Google TTS Translate API supports two speeds:
        'slow' <= 0.3 < 'normal'
    """
    SLOW = 0.3
    NORMAL = 1


[docs]class gTTS: """gTTS -- Google Text-to-Speech. An interface to Google Translate's Text-to-Speech API. Args: text (string): The text to be read. tld (string): Top-level domain for the Google Translate host, i.e `https://translate.google.<tld>`. This is useful when ``google.com`` might be blocked within a network but a local or different Google host (e.g. ``google.cn``) is not. Default is ``com``. lang (string, optional): The language (IETF language tag) to read the text in. Default is ``en``. slow (bool, optional): Reads text more slowly. Defaults to ``False``. lang_check (bool, optional): Strictly enforce an existing ``lang``, to catch a language error early. If set to ``True``, a ``ValueError`` is raised if ``lang`` doesn't exist. Setting ``lang_check`` to ``False`` skips Web requests (to validate language) and therefore speeds up instanciation. Default is ``True``. pre_processor_funcs (list): A list of zero or more functions that are called to transform (pre-process) text before tokenizing. Those functions must take a string and return a string. Defaults to:: [ pre_processors.tone_marks, pre_processors.end_of_line, pre_processors.abbreviations, pre_processors.word_sub ] tokenizer_func (callable): A function that takes in a string and returns a list of string (tokens). Defaults to:: Tokenizer([ tokenizer_cases.tone_marks, tokenizer_cases.period_comma, tokenizer_cases.colon, tokenizer_cases.other_punctuation ]).run See Also: :doc:`Pre-processing and tokenizing <tokenizer>` Raises: AssertionError: When ``text`` is ``None`` or empty; when there's nothing left to speak after pre-precessing, tokenizing and cleaning. ValueError: When ``lang_check`` is ``True`` and ``lang`` is not supported. RuntimeError: When ``lang_check`` is ``True`` but there's an error loading the languages dictionnary. """ GOOGLE_TTS_MAX_CHARS = 100 # Max characters the Google TTS API takes at a time GOOGLE_TTS_HEADERS = { "Referer": "http://translate.google.com/", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) " "AppleWebKit/537.36 (KHTML, like Gecko) " "Chrome/47.0.2526.106 Safari/537.36" } def __init__( self, text, tld='com', lang='en', slow=False, lang_check=True, pre_processor_funcs=[ pre_processors.tone_marks, pre_processors.end_of_line, pre_processors.abbreviations, pre_processors.word_sub ], tokenizer_func=Tokenizer([ tokenizer_cases.tone_marks, tokenizer_cases.period_comma, tokenizer_cases.colon, tokenizer_cases.other_punctuation ]).run ): # Debug for k, v in dict(locals()).items(): if k == 'self': continue log.debug("%s: %s", k, v) # Text assert text, 'No text to speak' self.text = text # Translate URL top-level domain self.tld = tld # Language if lang_check: try: langs = tts_langs(self.tld) if lang.lower() not in langs: raise ValueError("Language not supported: %s" % lang) except RuntimeError as e: log.debug(str(e), exc_info=True) log.warning(str(e)) self.lang_check = lang_check self.lang = lang.lower() # Read speed if slow: self.speed = Speed.SLOW else: self.speed = Speed.NORMAL # Pre-processors and tokenizer self.pre_processor_funcs = pre_processor_funcs self.tokenizer_func = tokenizer_func # Google Translate token self.token = gtts_token.Token() def _tokenize(self, text): # Pre-clean text = text.strip() # Apply pre-processors for pp in self.pre_processor_funcs: log.debug("pre-processing: %s", pp) text = pp(text) if _len(text) <= self.GOOGLE_TTS_MAX_CHARS: return _clean_tokens([text]) # Tokenize log.debug("tokenizing: %s", self.tokenizer_func) tokens = self.tokenizer_func(text) # Clean tokens = _clean_tokens(tokens) # Minimize min_tokens = [] for t in tokens: min_tokens += _minimize(t, ' ', self.GOOGLE_TTS_MAX_CHARS) return min_tokens def _prepare_requests(self): """Created the TTS API the request(s) without sending them. Returns: list: ``requests.PreparedRequests_``. <https://2.python-requests.org/en/master/api/#requests.PreparedRequest>`_``. """ # TTS API URL translate_url = _translate_url(tld=self.tld, path="translate_tts") text_parts = self._tokenize(self.text) log.debug("text_parts: %i", len(text_parts)) assert text_parts, 'No text to send to TTS API' prepared_requests = [] for idx, part in enumerate(text_parts): try: # Calculate token part_tk = self.token.calculate_token(part) except requests.exceptions.RequestException as e: # pragma: no cover log.debug(str(e), exc_info=True) raise gTTSError( "Connection error during token calculation: %s" % str(e)) payload = {'ie': 'UTF-8', 'q': part, 'tl': self.lang, 'ttsspeed': self.speed, 'total': len(text_parts), 'idx': idx, 'client': 'tw-ob', 'textlen': _len(part), 'tk': part_tk} log.debug("payload-%i: %s", idx, payload) # Request r = requests.Request(method='GET', url=translate_url, params=payload, headers=self.GOOGLE_TTS_HEADERS) # Prepare request prepared_requests.append(r.prepare()) return prepared_requests
[docs] def get_urls(self): """Get TTS API request URL(s) that would be sent to the TTS API. Returns: list: A list of TTS API request URLs to make. This is particularly useful to get the list of URLs generated by ``gTTS`` but not yet fullfilled, for example to be used by an external program. """ return [pr.url for pr in self._prepare_requests()]
[docs] def write_to_fp(self, fp): """Do the TTS API request(s) and write bytes to a file-like object. Args: fp (file object): Any file-like object to write the ``mp3`` to. Raises: :class:`gTTSError`: When there's an error with the API request. TypeError: When ``fp`` is not a file-like object that takes bytes. """ # When disabling ssl verify in requests (for proxies and firewalls), # urllib3 prints an insecure warning on stdout. We disable that. urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) prepared_requests = self._prepare_requests() for idx, pr in enumerate(prepared_requests): try: with requests.Session() as s: # Send request r = s.send(request=pr, proxies=urllib.request.getproxies(), verify=False) log.debug("headers-%i: %s", idx, r.request.headers) log.debug("url-%i: %s", idx, r.request.url) log.debug("status-%i: %s", idx, r.status_code) r.raise_for_status() except requests.exceptions.HTTPError as e: # pragma: no cover # Request successful, bad response log.debug(str(e)) raise gTTSError(tts=self, response=r) except requests.exceptions.RequestException as e: # pragma: no cover # Request failed log.debug(str(e)) raise gTTSError(tts=self) try: # Write for chunk in r.iter_content(chunk_size=1024): fp.write(chunk) log.debug("part-%i written to %s", idx, fp) except (AttributeError, TypeError) as e: raise TypeError( "'fp' is not a file-like object or it does not take bytes: %s" % str(e))
[docs] def save(self, savefile): """Do the TTS API request and write result to file. Args: savefile (string): The path and file name to save the ``mp3`` to. Raises: :class:`gTTSError`: When there's an error with the API request. """ with open(str(savefile), 'wb') as f: self.write_to_fp(f) log.debug("Saved to %s", savefile)
[docs]class gTTSError(Exception): """Exception that uses context to present a meaningful error message""" def __init__(self, msg=None, **kwargs): self.tts = kwargs.pop('tts', None) self.rsp = kwargs.pop('response', None) if msg: self.msg = msg elif self.tts is not None: self.msg = self.infer_msg(self.tts, self.rsp) else: self.msg = None super(gTTSError, self).__init__(self.msg)
[docs] def infer_msg(self, tts, rsp=None): """Attempt to guess what went wrong by using known information (e.g. http response) and observed behaviour """ cause = "Unknown" if rsp is None: premise = "Failed to connect" if tts.tld != 'com': host = _translate_url(tld=tts.tld) cause = "Host '{}' is not reachable".format(host) else: # rsp should be <requests.Response> # http://docs.python-requests.org/en/master/api/ status = rsp.status_code reason = rsp.reason premise = "{:d} ({}) from TTS API".format(status, reason) if status == 403: cause = "Bad token or upstream API changes" elif status == 404 and not tts.lang_check: cause = "Unsupported language '%s'" % self.tts.lang elif status >= 500: cause = "Uptream API error. Try again later." return "{}. Probable cause: {}".format(premise, cause)