[tagesschau] Restrict playlist entry regex

pmenzel · May 1, 2016 · 68bb2fe · 68bb2fe
1 parent 854cc54
commit 68bb2fe
Showing 1 changed file with 5 additions and 1 deletion.
diff --git a/youtube_dl/extractor/tagesschau.py b/youtube_dl/extractor/tagesschau.py
@@ -200,6 +200,10 @@ class TagesschauIE(InfoExtractor):
     }, {
         'url': 'http://www.tagesschau.de/100sekunden/index.html',
         'only_matching': True,
+    }, {
+        # playlist article with collapsing sections
+        'url': 'http://www.tagesschau.de/wirtschaft/faq-freihandelszone-eu-usa-101.html',
+        'only_matching': True,
     }]
 
     @classmethod
@@ -275,7 +279,7 @@ def _real_extract(self, url):
         if webpage_type == 'website':  # Article
             entries = []
             for num, (entry_title, media_kind, download_text) in enumerate(re.findall(
-                    r'(?s)<p[^>]+class="infotext"[^>]*>.*?<strong>(.+?)</strong>.*?</p>.*?%s' % DOWNLOAD_REGEX,
+                    r'(?s)<p[^>]+class="infotext"[^>]*>\s*(?:<a[^>]+>)?\s*<strong>(.+?)</strong>.*?</p>.*?%s' % DOWNLOAD_REGEX,
                     webpage), 1):
                 entries.append({
                     'id': '%s-%d' % (display_id, num),