From f97350fcf1d992d34b29add468f9104a4fc7d7aa Mon Sep 17 00:00:00 2001 From: jens Date: Wed, 23 Sep 2020 19:23:38 +0000 Subject: [PATCH] Add 'devdocs/vrtdownload.py' --- devdocs/vrtdownload.py | 49 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 devdocs/vrtdownload.py diff --git a/devdocs/vrtdownload.py b/devdocs/vrtdownload.py new file mode 100644 index 0000000..4fed1f6 --- /dev/null +++ b/devdocs/vrtdownload.py @@ -0,0 +1,49 @@ +# curl https://www.vrt.be/vrtnu/a-z/ | grep -o /a-z/.*relevant | sort -u | cut -d '.' -f 1 > all_vrt +# 1 can be a lot of numbers, sometimes the first seasons are not available + +import re +import os +import requests +import youtube_dl + + +startdir = os.getcwd() + +ydl_opts = { + 'username': '', + 'password': '', + 'no-overwrites': True, +} +ydl = youtube_dl.YoutubeDL(ydl_opts) + +#all_vrt = open('all_vrt').readlines() +#all_vrt = set(re.findall('/a-z/(.*).relevant', requests.get('https://www.vrt.be/vrtnu/a-z/').text)) +all_vrt = ['bevergem', 'de-collega-s', 'dox', 'duts', 'fc-de-kampioenen', 'het-peulengaleis', 'kamp-waes', 'kulderzipken', 'pano', 'postbus-x', 'reizen-waes', 'rick-stein-s-secret-france', 'stille-waters', 'team-scheire', 'the-handmaid-s-tale', 'tomtesterom', 'w817', 'wauters-vs--waes', 'witse', 'in-de-ban-van-tsjernobyl', 'factcheckers', 'flikken-maastricht', 'gentbrugge', 'geub', 'de-smurfen', 'deadwind', 'clinch','100-dagen', 'belpop-classics', 'de-blacklist', 'de-val--10-jaar-na-de-crisis','gevoel-voor-tumor' ] +for series in all_vrt: + for season in range(20): + # curl -v https://www.vrt.be/vrtnu/a-z/het-peulengaleis/3.lists.all-episodes/ | grep -o vrtnu[^\"]* + url = 'https://www.vrt.be/vrtnu/a-z/%s/%d.lists.all-episodes/' % (series.strip(), season) + #print(url) + all_eps = requests.get(url).text + episodes = set(re.findall('vrtnu/a-z/([^"]*)',all_eps)) + print(episodes) + print([x.split('/')[-2] for x in episodes]) + if episodes: + path = os.path.join(startdir,series,'Season_%d' % season) + try: + os.makedirs(path) + except: + pass + + for episode in episodes: + print(episode) + path = os.path.join(startdir,series,'Season_%d' % season, episode.split('/')[-2]) + print(path) + try: + os.mkdir(path) + os.chdir(path) + ydl.download(['https://www.vrt.be/vrtnu/a-z/' + episode]) + except Exception as e: + print('skipping existing folder %s' % episode) + print(e) +