# curl https://www.vrt.be/vrtnu/a-z/ | grep -o /a-z/.*relevant | sort -u | cut -d '.' -f 1 > all_vrt # 1 can be a lot of numbers, sometimes the first seasons are not available import re import os import requests import youtube_dl startdir = os.getcwd() ydl_opts = { 'username': '', 'password': '', 'no-overwrites': True, } ydl = youtube_dl.YoutubeDL(ydl_opts) #all_vrt = open('all_vrt').readlines() #all_vrt = set(re.findall('/a-z/(.*).relevant', requests.get('https://www.vrt.be/vrtnu/a-z/').text)) all_vrt = ['bevergem', 'de-collega-s', 'dox', 'duts', 'fc-de-kampioenen', 'het-peulengaleis', 'kamp-waes', 'kulderzipken', 'pano', 'postbus-x', 'reizen-waes', 'rick-stein-s-secret-france', 'stille-waters', 'team-scheire', 'the-handmaid-s-tale', 'tomtesterom', 'w817', 'wauters-vs--waes', 'witse', 'in-de-ban-van-tsjernobyl', 'factcheckers', 'flikken-maastricht', 'gentbrugge', 'geub', 'de-smurfen', 'deadwind', 'clinch','100-dagen', 'belpop-classics', 'de-blacklist', 'de-val--10-jaar-na-de-crisis','gevoel-voor-tumor' ] for series in all_vrt: for season in range(20): # curl -v https://www.vrt.be/vrtnu/a-z/het-peulengaleis/3.lists.all-episodes/ | grep -o vrtnu[^\"]* url = 'https://www.vrt.be/vrtnu/a-z/%s/%d.lists.all-episodes/' % (series.strip(), season) #print(url) all_eps = requests.get(url).text episodes = set(re.findall('vrtnu/a-z/([^"]*)',all_eps)) print(episodes) print([x.split('/')[-2] for x in episodes]) if episodes: path = os.path.join(startdir,series,'Season_%d' % season) try: os.makedirs(path) except: pass for episode in episodes: print(episode) path = os.path.join(startdir,series,'Season_%d' % season, episode.split('/')[-2]) print(path) try: os.mkdir(path) os.chdir(path) ydl.download(['https://www.vrt.be/vrtnu/a-z/' + episode]) except Exception as e: print('skipping existing folder %s' % episode) print(e)