# -*- coding: utf-8 -*-
from gtts.tokenizer import pre_processors, Tokenizer, tokenizer_cases
from gtts.utils import _minimize, _len, _clean_tokens, _translate_url
from gtts.lang import tts_langs
from gtts_token import gtts_token
from six.moves import urllib
import urllib3
import requests
import logging
__all__ = ['gTTS', 'gTTSError']
# Logger
log = logging.getLogger(__name__)
log.addHandler(logging.NullHandler())
class Speed:
"""Read Speed
The Google TTS Translate API supports two speeds:
'slow' <= 0.3 < 'normal'
"""
SLOW = 0.3
NORMAL = 1
[docs]class gTTS:
"""gTTS -- Google Text-to-Speech.
An interface to Google Translate's Text-to-Speech API.
Args:
text (string): The text to be read.
tld (string): Top-level domain for the Google Translate host,
i.e `https://translate.google.<tld>`. This is useful
when ``google.com`` might be blocked within a network but
a local or different Google host (e.g. ``google.cn``) is not.
Default is ``com``.
lang (string, optional): The language (IETF language tag) to
read the text in. Default is ``en``.
slow (bool, optional): Reads text more slowly. Defaults to ``False``.
lang_check (bool, optional): Strictly enforce an existing ``lang``,
to catch a language error early. If set to ``True``,
a ``ValueError`` is raised if ``lang`` doesn't exist.
Setting ``lang_check`` to ``False`` skips Web requests
(to validate language) and therefore speeds up instanciation.
Default is ``True``.
pre_processor_funcs (list): A list of zero or more functions that are
called to transform (pre-process) text before tokenizing. Those
functions must take a string and return a string. Defaults to::
[
pre_processors.tone_marks,
pre_processors.end_of_line,
pre_processors.abbreviations,
pre_processors.word_sub
]
tokenizer_func (callable): A function that takes in a string and
returns a list of string (tokens). Defaults to::
Tokenizer([
tokenizer_cases.tone_marks,
tokenizer_cases.period_comma,
tokenizer_cases.colon,
tokenizer_cases.other_punctuation
]).run
See Also:
:doc:`Pre-processing and tokenizing <tokenizer>`
Raises:
AssertionError: When ``text`` is ``None`` or empty; when there's nothing
left to speak after pre-precessing, tokenizing and cleaning.
ValueError: When ``lang_check`` is ``True`` and ``lang`` is not supported.
RuntimeError: When ``lang_check`` is ``True`` but there's an error loading
the languages dictionary.
"""
GOOGLE_TTS_MAX_CHARS = 100 # Max characters the Google TTS API takes at a time
GOOGLE_TTS_HEADERS = {
"Referer": "http://translate.google.com/",
"User-Agent":
"Mozilla/5.0 (Windows NT 10.0; WOW64) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/47.0.2526.106 Safari/537.36"
}
def __init__(
self,
text,
tld='com',
lang='en',
slow=False,
lang_check=True,
pre_processor_funcs=[
pre_processors.tone_marks,
pre_processors.end_of_line,
pre_processors.abbreviations,
pre_processors.word_sub
],
tokenizer_func=Tokenizer([
tokenizer_cases.tone_marks,
tokenizer_cases.period_comma,
tokenizer_cases.colon,
tokenizer_cases.other_punctuation
]).run
):
# Debug
for k, v in dict(locals()).items():
if k == 'self':
continue
log.debug("%s: %s", k, v)
# Text
assert text, 'No text to speak'
self.text = text
# Translate URL top-level domain
self.tld = tld
# Language
if lang_check:
try:
langs = tts_langs(self.tld)
if lang.lower() not in langs:
raise ValueError("Language not supported: %s" % lang)
except RuntimeError as e:
log.debug(str(e), exc_info=True)
log.warning(str(e))
self.lang_check = lang_check
self.lang = lang.lower()
# Read speed
if slow:
self.speed = Speed.SLOW
else:
self.speed = Speed.NORMAL
# Pre-processors and tokenizer
self.pre_processor_funcs = pre_processor_funcs
self.tokenizer_func = tokenizer_func
# Google Translate token
self.token = gtts_token.Token()
def _tokenize(self, text):
# Pre-clean
text = text.strip()
# Apply pre-processors
for pp in self.pre_processor_funcs:
log.debug("pre-processing: %s", pp)
text = pp(text)
if _len(text) <= self.GOOGLE_TTS_MAX_CHARS:
return _clean_tokens([text])
# Tokenize
log.debug("tokenizing: %s", self.tokenizer_func)
tokens = self.tokenizer_func(text)
# Clean
tokens = _clean_tokens(tokens)
# Minimize
min_tokens = []
for t in tokens:
min_tokens += _minimize(t, ' ', self.GOOGLE_TTS_MAX_CHARS)
# Filter empty tokens, post-minimize
tokens = [t for t in min_tokens if t]
return min_tokens
def _prepare_requests(self):
"""Created the TTS API the request(s) without sending them.
Returns:
list: ``requests.PreparedRequests_``. <https://2.python-requests.org/en/master/api/#requests.PreparedRequest>`_``.
"""
# TTS API URL
translate_url = _translate_url(tld=self.tld, path="translate_tts")
text_parts = self._tokenize(self.text)
log.debug("text_parts: %i", len(text_parts))
assert text_parts, 'No text to send to TTS API'
prepared_requests = []
for idx, part in enumerate(text_parts):
try:
# Calculate token
part_tk = self.token.calculate_token(part)
except requests.exceptions.RequestException as e: # pragma: no cover
log.debug(str(e), exc_info=True)
raise gTTSError(
"Connection error during token calculation: %s" %
str(e))
payload = {'ie': 'UTF-8',
'q': part,
'tl': self.lang,
'ttsspeed': self.speed,
'total': len(text_parts),
'idx': idx,
'client': 'tw-ob',
'textlen': _len(part),
'tk': part_tk}
log.debug("payload-%i: %s", idx, payload)
# Request
r = requests.Request(method='GET',
url=translate_url,
params=payload,
headers=self.GOOGLE_TTS_HEADERS)
# Prepare request
prepared_requests.append(r.prepare())
return prepared_requests
[docs] def get_urls(self):
"""Get TTS API request URL(s) that would be sent to the TTS API.
Returns:
list: A list of TTS API request URLs to make.
This is particularly useful to get the list of URLs generated
by ``gTTS`` but not yet fullfilled,
for example to be used by an external program.
"""
return [pr.url for pr in self._prepare_requests()]
[docs] def write_to_fp(self, fp):
"""Do the TTS API request(s) and write bytes to a file-like object.
Args:
fp (file object): Any file-like object to write the ``mp3`` to.
Raises:
:class:`gTTSError`: When there's an error with the API request.
TypeError: When ``fp`` is not a file-like object that takes bytes.
"""
# When disabling ssl verify in requests (for proxies and firewalls),
# urllib3 prints an insecure warning on stdout. We disable that.
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
prepared_requests = self._prepare_requests()
for idx, pr in enumerate(prepared_requests):
try:
with requests.Session() as s:
# Send request
r = s.send(request=pr,
proxies=urllib.request.getproxies(),
verify=False)
log.debug("headers-%i: %s", idx, r.request.headers)
log.debug("url-%i: %s", idx, r.request.url)
log.debug("status-%i: %s", idx, r.status_code)
r.raise_for_status()
except requests.exceptions.HTTPError as e: # pragma: no cover
# Request successful, bad response
log.debug(str(e))
raise gTTSError(tts=self, response=r)
except requests.exceptions.RequestException as e: # pragma: no cover
# Request failed
log.debug(str(e))
raise gTTSError(tts=self)
try:
# Write
for chunk in r.iter_content(chunk_size=1024):
fp.write(chunk)
log.debug("part-%i written to %s", idx, fp)
except (AttributeError, TypeError) as e:
raise TypeError(
"'fp' is not a file-like object or it does not take bytes: %s" %
str(e))
[docs] def save(self, savefile):
"""Do the TTS API request and write result to file.
Args:
savefile (string): The path and file name to save the ``mp3`` to.
Raises:
:class:`gTTSError`: When there's an error with the API request.
"""
with open(str(savefile), 'wb') as f:
self.write_to_fp(f)
log.debug("Saved to %s", savefile)
[docs]class gTTSError(Exception):
"""Exception that uses context to present a meaningful error message"""
def __init__(self, msg=None, **kwargs):
self.tts = kwargs.pop('tts', None)
self.rsp = kwargs.pop('response', None)
if msg:
self.msg = msg
elif self.tts is not None:
self.msg = self.infer_msg(self.tts, self.rsp)
else:
self.msg = None
super(gTTSError, self).__init__(self.msg)
[docs] def infer_msg(self, tts, rsp=None):
"""Attempt to guess what went wrong by using known
information (e.g. http response) and observed behaviour
"""
cause = "Unknown"
if rsp is None:
premise = "Failed to connect"
if tts.tld != 'com':
host = _translate_url(tld=tts.tld)
cause = "Host '{}' is not reachable".format(host)
else:
# rsp should be <requests.Response>
# http://docs.python-requests.org/en/master/api/
status = rsp.status_code
reason = rsp.reason
premise = "{:d} ({}) from TTS API".format(status, reason)
if status == 403:
cause = "Bad token or upstream API changes"
elif status == 404 and not tts.lang_check:
cause = "Unsupported language '%s'" % self.tts.lang
elif status >= 500:
cause = "Uptream API error. Try again later."
return "{}. Probable cause: {}".format(premise, cause)