359 lines
15 KiB
Swift
359 lines
15 KiB
Swift
//
|
|
// VrtNuLayout.swift
|
|
// vrtnu website scrapping (not gui layout but website scrapping layout)
|
|
// The VRTNu object is built up as:
|
|
// VRTNu -> Shows -> Seasons -> Episodes
|
|
// An episode has a reference to it's season, a season has a reference to it's show and a show has a reference to the VRTNu main struct
|
|
// Most of this is implemented as lazy vars to only parse the webpages that the user realy wants
|
|
// We use a global just object so we just have one session and we can remain logged in over all our requests
|
|
//
|
|
// Created by Jens Timmerman on 25/09/2020.
|
|
//
|
|
|
|
import Foundation
|
|
|
|
let just = JustOf<HTTP>()
|
|
|
|
struct Episode: Hashable, Comparable{
|
|
|
|
static func < (lhs: Episode, rhs: Episode) -> Bool {
|
|
return lhs.name.localizedStandardCompare(rhs.name) == .orderedAscending
|
|
}
|
|
|
|
let name: String
|
|
let title: String
|
|
let season: Season
|
|
let imageURL: URL
|
|
let episodeurl: String
|
|
|
|
|
|
func authenticated() -> Bool{
|
|
//check if current token is valid
|
|
let videojson = getVideoJson()
|
|
print(videojson)
|
|
if videojson.object(forKey: "code") != nil{
|
|
if videojson.value(forKey: "code") as! String == "AUTHENTICATION_REQUIRED"
|
|
{
|
|
print("not authenticated")
|
|
return false
|
|
}
|
|
print("maybe vrt authentication logic changed? please report this")
|
|
}
|
|
print("authenticated")
|
|
|
|
return true
|
|
}
|
|
|
|
lazy var video: Video = {
|
|
let videojson = getVideoJson()
|
|
print(videojson)
|
|
|
|
let duration = videojson.value(forKey: "duration") as! Double
|
|
let title = videojson.value(forKey: "title") as! String
|
|
let targetURLs = videojson.value(forKey: "targetUrls") as! [NSDictionary]
|
|
var videourl = targetURLs[0].value(forKey: "url") as! String
|
|
|
|
// it seams that the hls_aes stream has more changes of playing
|
|
// TODO: pass all streams and switch stream if one fails?
|
|
for i in 0 ..< targetURLs.count{
|
|
if targetURLs[i].value(forKey: "type") as! String == "hls_aes"{
|
|
videourl = targetURLs[i].value(forKey: "url") as! String
|
|
}
|
|
}
|
|
|
|
print(videourl)
|
|
//session.get('https://media-services-public.vrt.be/vualto-video-aggregator-web/rest/external/v1/videos/%s?vrtPlayerToken=%s&client=%s@PROD' %(video_id, token, clientid)).json()
|
|
var video = Video(hlsUrl: URL(string: videourl)!,
|
|
title: title,
|
|
duration: duration)
|
|
return video
|
|
}()
|
|
|
|
init(season: Season, episodeName: String, title: String, imageURL: URL){
|
|
self.name = episodeName
|
|
self.season = season
|
|
self.imageURL = imageURL
|
|
self.episodeurl = "https://www.vrt.be/vrtnu/a-z/" + season.show.showName + "/" + season.seasonName + "/" + episodeName
|
|
self.title = title
|
|
}
|
|
|
|
func getVideo() -> Video{
|
|
var lazyself = self
|
|
return lazyself.video
|
|
}
|
|
|
|
func getVideoJson() -> NSDictionary{
|
|
let episodedata = just.get(episodeurl).text!
|
|
let videoid = parseData(data:episodedata, regexPattern: "vid-[a-z0-9-]*")
|
|
print(videoid)
|
|
let pubid = parseData(data:episodedata, regexPattern: "pbs-pub-[a-z0-9-]*")
|
|
print(pubid)
|
|
let videoidd = pubid[0] + "$" + videoid[0]
|
|
print(videoidd)
|
|
//<nui-media posterimage="https://images.vrt.be/orig/2020/05/18/9e5f3cf2-9926-11ea-aae0-02b7b76bf47f.jpg" mediaapiurl="https://media-services-public.vrt.be/vualto-video-aggregator-web/rest/external/v1" videoid="vid-4a3a3cff-0bae-4884-9507-9590472c3bf7" publicationid="pbs-pub-b5ee97f5-f272-455f-ba9b-d3d7b179839d" analytics="{"date":"2020-05-18T18:34:00.000+02:00","whatson":"866106147527","program_whatson":"857086980527","episode":"#ikluisterbelgisch live - 18/05 #ikluisterbelgisch live 2","program":"#ikluisterbelgisch live","cimIdentifier":".cA63ye4ZyIIPzz71Hl5Bqe1rmDIl1s5ggCFAoZ9PvL.I7","onTime":"2020-05-18T16:00:00.000+02:00","playlist":"//www.vrt.be/vrtnu/a-z/-ikluisterbelgisch-live/2020/-ikluisterbelgisch-live--ikluisterbelgisch-live-2","sourceType":"vid.tvi.ep.vod.free","type_clip":"asset","parts":"1","station":"stubru","publication_date":"2020-05-18T18:37:10.993Z","category":"Muziek"}" environment="prod" focalpoint="50.000% 50.000%" assetpath="/content/dam/vrt/2020/05/18/ikluisterbelgisch-live-2-compilatie-depot_AIM10816322" class="">
|
|
|
|
let clientid = "vrtvideo"
|
|
|
|
var mediaurl = "https://media-services-public.vrt.be/vualto-video-aggregator-web/rest/external/v1/videos/" + videoidd
|
|
mediaurl = mediaurl + "?vrtPlayerToken="
|
|
mediaurl = mediaurl + self.season.show.vrtNu.getToken() + "&client="
|
|
mediaurl = mediaurl + clientid + "@PROD"
|
|
print(mediaurl)
|
|
let videojson = just.get(mediaurl).json! as! NSDictionary
|
|
return videojson
|
|
}
|
|
|
|
}
|
|
|
|
func parseData(data: String, regexPattern: String) -> [String]{
|
|
let range = NSRange(location: 0, length: data.count)
|
|
let regex = try! NSRegularExpression(pattern: regexPattern)
|
|
let matches = regex.matches(in: data, range: range)
|
|
let nsString = data as NSString
|
|
//let output = Array(Set(matches.map {nsString.substring(with: $0.range)}))
|
|
let output = matches.map {nsString.substring(with: $0.range)}
|
|
print(output)
|
|
return output
|
|
}
|
|
|
|
struct Season: Hashable, Comparable{
|
|
static func < (lhs: Season, rhs: Season) -> Bool {
|
|
//return lhs.seasonName < rhs.seasonName
|
|
return lhs.seasonName.localizedStandardCompare(rhs.seasonName) == .orderedAscending
|
|
}
|
|
|
|
static func == (lhs: Season, rhs: Season) -> Bool {
|
|
return lhs.seasonURL == rhs.seasonURL
|
|
}
|
|
|
|
let title: String
|
|
let show: Show
|
|
let seasonURL: URL
|
|
let seasonName: String
|
|
|
|
|
|
init(show: Show, seasonName: String, title: String){
|
|
self.show = show
|
|
self.title = title
|
|
self.seasonName = seasonName
|
|
print(seasonName)
|
|
print(show.showName)
|
|
self.seasonURL = URL(string: "https://www.vrt.be/vrtnu/a-z/" + show.showName + "/" + seasonName)!
|
|
}
|
|
|
|
func getEpisodes() -> [Episode]{
|
|
//`set(re.findall('vrtnu/a-z/%s/%s/([^"]*)/' %(show, season),requests.get('https://www.vrt.be/vrtnu/a-z/%s/%s.lists.all-episodes/' % (show, season)).text))`
|
|
|
|
print("getting episodes for " + show.showName + " " + seasonName)
|
|
let regexPattern = "vrtnu/a-z/" + show.showName + "/" + seasonName + "/([^\"]*)/"
|
|
let imageregexPattern = "data-responsive-image=\".*(jpg|png)"
|
|
let titleregexPattern = "\">(.*)(</a>|<br />)"
|
|
|
|
let data = Just.get("https://www.vrt.be/vrtnu/a-z/" + show.showName + "/" + seasonName + ".lists.all-episodes/").text!
|
|
let output = parseData(data: data, regexPattern: regexPattern)
|
|
let imageoutput = parseData(data: data, regexPattern: imageregexPattern)
|
|
let titleoutput = parseData(data: data, regexPattern: titleregexPattern)
|
|
print(data)
|
|
|
|
var episode: String
|
|
var myepisodes: [Episode]
|
|
myepisodes = []
|
|
for i in 0 ..< titleoutput.count{
|
|
// output always contains same url twice
|
|
episode = output[i * 2].replacingOccurrences(of: "vrtnu/a-z/" + show.showName + "/" + seasonName + "/", with: "").replacingOccurrences(of: "/", with: "")
|
|
print(episode)
|
|
print(seasonName)
|
|
let image = URL(string: imageoutput[i].replacingOccurrences(of: "https:", with: "").replacingOccurrences(of: "http:", with: "").replacingOccurrences(of: "data-responsive-image=\"", with: "https:"))!
|
|
let title = titleoutput[i].replacingOccurrences(of: "\">", with: "").replacingOccurrences(of: "<br />", with: "").replacingOccurrences(of: "</a>", with: "")
|
|
myepisodes.append(Episode(season: self, episodeName: episode, title: title, imageURL: image))
|
|
}
|
|
|
|
myepisodes.sort()
|
|
return myepisodes
|
|
}
|
|
|
|
}
|
|
|
|
struct Show: Hashable, Comparable{
|
|
|
|
static func < (lhs: Show, rhs: Show) -> Bool {
|
|
//return lhs.showName < rhs.showName
|
|
return lhs.showName.localizedStandardCompare(rhs.showName) == .orderedAscending
|
|
}
|
|
|
|
static func == (lhs: Show, rhs: Show) -> Bool {
|
|
return lhs.showURL == rhs.showURL
|
|
}
|
|
|
|
let showName: String
|
|
let showURL: URL
|
|
let title: String
|
|
let imageURL: URL
|
|
let vrtNu: VRTNu
|
|
|
|
init(vrtNu: VRTNu, showName: String, title: String, imageURL: URL) {
|
|
self.showURL = URL(string: "https://www.vrt.be/vrtnu/a-z/" + showName + "/")!
|
|
self.showName = showName
|
|
self.title = title
|
|
//TODO: get image urls for shows
|
|
self.imageURL = imageURL
|
|
self.vrtNu = vrtNu
|
|
|
|
}
|
|
|
|
/* func getEpisodes()-> [Episode]{
|
|
# actually all episodes are already listed in the show's page, no need to get the individual episodes
|
|
print("getting episodes for" + showName)
|
|
let regexPattern = "link=\"/vrtnu/a-z/" + showName + "/(.*).html\">"
|
|
let seasondata = Just.get(showURL).text!
|
|
let output = parseData(data: seasondata, regexPattern: regexPattern)
|
|
|
|
let imageregexPattern = "data-responsive-image=\".*(jpg|png)"
|
|
let titleregexPattern = "\">(.*)(</a>|<br />)"
|
|
|
|
let data = Just.get("https://www.vrt.be/vrtnu/a-z/" + show.showName + "/" + seasonName + ".lists.all-episodes/").text!
|
|
let output = parseData(data: data, regexPattern: regexPattern)
|
|
let imageoutput = parseData(data: data, regexPattern: imageregexPattern)
|
|
let titleoutput = parseData(data: data, regexPattern: titleregexPattern)
|
|
print(data)
|
|
|
|
var episode: String
|
|
var myepisodes: [Episode]
|
|
myepisodes = []
|
|
for i in 0 ..< titleoutput.count{
|
|
// output always contains same url twice
|
|
episode = output[i * 2].replacingOccurrences(of: "vrtnu/a-z/" + show.showName + "/" + seasonName + "/", with: "").replacingOccurrences(of: "/", with: "")
|
|
print(episode)
|
|
print(seasonName)
|
|
let image = URL(string: imageoutput[i].replacingOccurrences(of: "https:", with: "").replacingOccurrences(of: "http:", with: "").replacingOccurrences(of: "data-responsive-image=\"", with: "https:"))!
|
|
let title = titleoutput[i].replacingOccurrences(of: "\">", with: "").replacingOccurrences(of: "<br />", with: "").replacingOccurrences(of: "</a>", with: "")
|
|
myepisodes.append(Episode(season: self, episodeName: episode, title: title, imageURL: image))
|
|
}
|
|
|
|
myepisodes.sort()
|
|
return myepisodes
|
|
}
|
|
|
|
|
|
}*/
|
|
|
|
func getSeasons() -> [Season]{
|
|
//`re.findall('value="#parsys_container_banner_%s_(.*)">' % show, requests.get('https://www.vrt.be/vrtnu/a-z/%s/' % show).text)`
|
|
let regexPattern = "value=\"#parsys_container_banner_(" + showName + "_)?(.*)\">"
|
|
print("getting seasons from " + showURL.absoluteString)
|
|
print("filtering with " + regexPattern)
|
|
|
|
let output = parseData(data: Just.get(showURL).text!, regexPattern: regexPattern)
|
|
|
|
var season: String
|
|
var myseasons: [Season]
|
|
myseasons = []
|
|
for i in 0 ..< output.count{
|
|
print(output[i])
|
|
season = output[i].replacingOccurrences(of: "value=\"#parsys_container_banner_", with: "")
|
|
.replacingOccurrences(of: showName + "_", with: "").replacingOccurrences(of: "\">", with: "")
|
|
print(season)
|
|
print(showName)
|
|
myseasons.append(Season(show: self, seasonName: season, title: season))
|
|
}
|
|
myseasons.sort()
|
|
return myseasons
|
|
}
|
|
}
|
|
|
|
extension NSTextCheckingResult {
|
|
func groups(testedString:String) -> [String] {
|
|
var groups = [String]()
|
|
for i in 0 ..< self.numberOfRanges
|
|
{
|
|
let group = String(testedString[Range(self.range(at: i), in: testedString)!])
|
|
groups.append(group)
|
|
}
|
|
return groups
|
|
}
|
|
}
|
|
|
|
struct VRTNu: Hashable {
|
|
|
|
let regexPattern = "a href=\"/vrtnu/a-z/(.*).relevant"
|
|
let imageregexPattern = "data-responsive-image=\".*(jpg|png)"
|
|
let titleregexPattern = ".relevant/\">(.*)</a>"
|
|
|
|
func getToken() -> String {
|
|
///token = session.post('https://media-services-public.vrt.be/vualto-video-aggregator-web/rest/external/v1/tokens', headers={'Content-Type': 'application/json'}, data=b'').json()['vrtPlayerToken']
|
|
let tokenjson = just.post("https://media-services-public.vrt.be/vualto-video-aggregator-web/rest/external/v1/tokens", headers:["Content-Type": "application/json"]).json!
|
|
let tokendict = tokenjson as! NSDictionary
|
|
let token = tokendict.value(forKey: "vrtPlayerToken") as! String
|
|
print(token)
|
|
return token
|
|
}
|
|
|
|
|
|
|
|
func login(username: String, password:String) -> Bool {
|
|
let auth_data = [
|
|
"ApiKey": "3_0Z2HujMtiWq_pkAjgnS2Md2E11a1AwZjYiBETtwNE-EoEHDINgtnvcAOpNgmrVGy",
|
|
"targetEnv": "jssdk",
|
|
"loginID": username,
|
|
"password": password,
|
|
"authMode": "cookie",
|
|
]
|
|
let auth_json = just.post("https://accounts.eu1.gigya.com/accounts.login", data: auth_data).json!
|
|
let auth_info = auth_json as! NSDictionary
|
|
print(auth_info)
|
|
if auth_info.object(forKey: "statusCode") != nil{
|
|
if auth_info.value(forKey: "statusCode") as! Int == 403
|
|
{
|
|
print("not authenticated")
|
|
return false
|
|
}
|
|
}
|
|
// no token is returnd but necessary cookies are set
|
|
just.post("https://token.vrt.be", json:[
|
|
"uid": auth_info.value(forKey: "UID"),
|
|
"uidsig": auth_info.value(forKey: "UIDSignature"),
|
|
"ts": auth_info.value(forKey: "signatureTimestamp"),
|
|
//"email": auth_info.value(forKey: "profile"['email'],
|
|
"email": username
|
|
],
|
|
headers: [
|
|
"Conetnt-Type": "application/json",
|
|
"Referer": "https://www.vrt.be/vrtnu/",
|
|
]
|
|
|
|
)
|
|
print("authenticated")
|
|
return true
|
|
|
|
}
|
|
|
|
init() {
|
|
print("init")
|
|
}
|
|
|
|
func getShows() -> [Show]{
|
|
print("getting shows")
|
|
|
|
let data = Just.get("https://www.vrt.be/vrtnu/a-z/").text!
|
|
print(data)
|
|
|
|
let output = parseData(data: data, regexPattern: regexPattern)
|
|
let imageoutput = parseData(data: data, regexPattern: imageregexPattern)
|
|
let titleoutput = parseData(data: data, regexPattern: titleregexPattern)
|
|
|
|
var show: String
|
|
var title: String
|
|
var image: String
|
|
var myshows: [Show]
|
|
myshows = []
|
|
for i in 0 ..< output.count{
|
|
show = output[i].replacingOccurrences(of: ".relevant", with: "").replacingOccurrences(of: "a href=\"/vrtnu/a-z/", with: "")
|
|
image = imageoutput[i].replacingOccurrences(of: "https:", with: "").replacingOccurrences(of: "http:", with: "").replacingOccurrences(of: "data-responsive-image=\"", with: "https:")
|
|
title = titleoutput[i].replacingOccurrences(of: ".relevant/\">", with: "").replacingOccurrences(of: "</a>", with: "")
|
|
myshows.append(Show(vrtNu: self, showName: show, title: title, imageURL: URL(string: image)!))
|
|
}
|
|
myshows.sort()
|
|
return myshows
|
|
}
|
|
}
|