From 96a0ad4778da7f30ed5be627f2c10df6d0af3ca8 Mon Sep 17 00:00:00 2001
From: Zenon Mousmoulas <zmousm@noc.grnet.gr>
Date: Sat, 13 Nov 2021 11:50:05 +0200
Subject: [PATCH] MegaTVComEmbedIE: Make canonical URL extraction more robust

---
 youtube_dl/extractor/megatvcom.py | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)
diff --git a/youtube_dl/extractor/megatvcom.py b/youtube_dl/extractor/megatvcom.py
index 26fbcff4b..46db816d8 100644
--- a/youtube_dl/extractor/megatvcom.py
+++ b/youtube_dl/extractor/megatvcom.py
@@ -166,10 +166,26 @@ class MegaTVComEmbedIE(MegaTVComBaseIE):
                 url = '%s:%s' % (scheme, url)
             yield url
 
+    def _match_canonical_url(self, webpage):
+        LINK_RE = r'''(?x)
+        <link(?:
+            rel=(?P<_q1>%(quot_re)s)(?P<canonical>canonical)(?P=_q1)|
+            href=(?P<_q2>%(quot_re)s)(?P<href>(?:(?!(?P=_q2)).)+)(?P=_q2)|
+            [^>]*?
+        )+>
+        ''' % {'quot_re': r'["\']'}
+        for mobj in re.finditer(LINK_RE, webpage):
+            canonical, href = mobj.group('canonical', 'href')
+            if canonical and href:
+                return unescapeHTML(href)
+
     def _real_extract(self, url):
         webpage = self._download_webpage(url, 'N/A')
         player_attrs = self._extract_player_attrs(webpage)
-        canonical_url = player_attrs['share_url']
+        canonical_url = player_attrs.get('share_url') or \
+            self._match_canonical_url(webpage)
+        if not canonical_url:
+            raise ExtractorError('canonical URL not found')
         video_id = compat_parse_qs(compat_urllib_parse_urlparse(
             canonical_url).query)['p'][0]