mirror of
https://github.com/ytdl-org/youtube-dl.git
synced 2024-12-22 16:57:40 +00:00
[utils] Support ttaf1 namespace in TTML
It's found in bbc.co.uk. See #6038
This commit is contained in:
parent
607841af64
commit
4e33577173
1 changed files with 7 additions and 4 deletions
|
@ -1841,7 +1841,10 @@ def srt_subtitles_timecode(seconds):
|
||||||
|
|
||||||
|
|
||||||
def dfxp2srt(dfxp_data):
|
def dfxp2srt(dfxp_data):
|
||||||
_x = functools.partial(xpath_with_ns, ns_map={'ttml': 'http://www.w3.org/ns/ttml'})
|
_x = functools.partial(xpath_with_ns, ns_map={
|
||||||
|
'ttml': 'http://www.w3.org/ns/ttml',
|
||||||
|
'ttaf1': 'http://www.w3.org/2006/10/ttaf1',
|
||||||
|
})
|
||||||
|
|
||||||
def parse_node(node):
|
def parse_node(node):
|
||||||
str_or_empty = functools.partial(str_or_none, default='')
|
str_or_empty = functools.partial(str_or_none, default='')
|
||||||
|
@ -1849,9 +1852,9 @@ def dfxp2srt(dfxp_data):
|
||||||
out = str_or_empty(node.text)
|
out = str_or_empty(node.text)
|
||||||
|
|
||||||
for child in node:
|
for child in node:
|
||||||
if child.tag in (_x('ttml:br'), 'br'):
|
if child.tag in (_x('ttml:br'), _x('ttaf1:br'), 'br'):
|
||||||
out += '\n' + str_or_empty(child.tail)
|
out += '\n' + str_or_empty(child.tail)
|
||||||
elif child.tag in (_x('ttml:span'), 'span'):
|
elif child.tag in (_x('ttml:span'), _x('ttaf1:span'), 'span'):
|
||||||
out += str_or_empty(parse_node(child))
|
out += str_or_empty(parse_node(child))
|
||||||
else:
|
else:
|
||||||
out += str_or_empty(xml.etree.ElementTree.tostring(child))
|
out += str_or_empty(xml.etree.ElementTree.tostring(child))
|
||||||
|
@ -1860,7 +1863,7 @@ def dfxp2srt(dfxp_data):
|
||||||
|
|
||||||
dfxp = xml.etree.ElementTree.fromstring(dfxp_data.encode('utf-8'))
|
dfxp = xml.etree.ElementTree.fromstring(dfxp_data.encode('utf-8'))
|
||||||
out = []
|
out = []
|
||||||
paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
|
paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall(_x('.//ttaf1:p')) or dfxp.findall('.//p')
|
||||||
|
|
||||||
if not paras:
|
if not paras:
|
||||||
raise ValueError('Invalid dfxp/TTML subtitle')
|
raise ValueError('Invalid dfxp/TTML subtitle')
|
||||||
|
|
Loading…
Reference in a new issue