X-Git-Url: http://git.code-monkey.de/?p=plugin.video.netflix.git;a=blobdiff_plain;f=resources%2Flib%2FNetflixSession.py;h=62e58fd0d76fc05ba78667774a420e61b50b7f79;hp=d1169117c2536c3e2e53623ff6ee731a87fc7395;hb=00100bf1d63fcc4895f81aefbe8d79784f908330;hpb=1d8369ef0de8ea43ac783282d50f4844f486d01d diff --git a/resources/lib/NetflixSession.py b/resources/lib/NetflixSession.py index d116911..62e58fd 100644 --- a/resources/lib/NetflixSession.py +++ b/resources/lib/NetflixSession.py @@ -3,20 +3,18 @@ # Module: NetflixSession # Created on: 13.01.2017 -import sys import os -import base64 -import time -import urllib import json -import requests +from requests import session, cookies +from urllib import quote, unquote +from time import time +from base64 import urlsafe_b64encode +from bs4 import BeautifulSoup, SoupStrainer +from utils import noop, get_user_agent_for_current_platform try: import cPickle as pickle except: import pickle -from bs4 import BeautifulSoup -from utils import strip_tags -from utils import noop class NetflixSession: """Helps with login/session management of Netflix users & API data fetching""" @@ -26,15 +24,16 @@ class NetflixSession: urls = { 'login': '/login', - 'browse': '/browse', - 'video_list_ids': '/warmer', + 'browse': '/profiles/manage', + 'video_list_ids': '/preflight', 'shakti': '/pathEvaluator', - 'profiles': '/profiles', + 'profiles': '/profiles/manage', 'switch_profiles': '/profiles/switch', 'adult_pin': '/pin/service', 'metadata': '/metadata', 'set_video_rating': '/setVideoRating', - 'update_my_list': '/playlistop' + 'update_my_list': '/playlistop', + 'kids': '/Kids' } """:obj:`dict` of :obj:`str` List of all static endpoints for HTML/JSON POST/GET requests""" @@ -62,15 +61,7 @@ class NetflixSession: { "guid": "72ERT45...", "authURL": "145637....", - "countryOfSignup": "DE", - "emailAddress": "foo@..", - "gpsModel": "harris", - "isAdultVerified": True, - "isInFreeTrial": False, - "isKids": False, - "isTestAccount": False, - "numProfiles": 5, - "pinEnabled": True + "gpsModel": "harris" } """ @@ -87,7 +78,7 @@ class NetflixSession: """ esn = '' - """str: Widevine esn, something like: NFCDCH-MC-D7D6F54LOPY8J416T72MQXX3RD20ME""" + """str: ESN - something like: NFCDCH-MC-D7D6F54LOPY8J416T72MQXX3RD20ME""" def __init__(self, cookie_path, data_path, verify_ssl=True, log_fn=noop): """Stores the cookie path for later use & instanciates a requests @@ -109,11 +100,11 @@ class NetflixSession: self.verify_ssl = verify_ssl self.log = log_fn - # start session, fake chrome (so that we get a proper widevine esn) & enable gzip - self.session = requests.session() + # start session, fake chrome on the current platform (so that we get a proper widevine esn) & enable gzip + self.session = session() self.session.headers.update({ - 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36', - 'Accept-Encoding': 'gzip, deflate' + 'User-Agent': get_user_agent_for_current_platform(), + 'Accept-Encoding': 'gzip' }) def parse_login_form_fields (self, form_soup): @@ -145,218 +136,27 @@ class NetflixSession: def extract_inline_netflix_page_data (self, page_soup): """Extracts all - - So we´re extracting every JavaScript object contained in the `netflix.x = {};` variable, - strip all html tags, unescape the whole thing & finally parse the resulting serialized JSON from this - operations. Errors are expected, as not all + We use a JS parser to generate an AST of the code given & then parse that AST into a python dict. + This should be okay, as we´re only interested in a few static values & put the rest aside Parameters ---------- page_soup : :obj:`BeautifulSoup` Instance of an BeautifulSoup document or node containing the complete page contents - Returns ------- :obj:`list` of :obj:`dict` List of all the serialized data pulled out of the pagws ', '').strip() - # unescape the contents as they contain characters a JSON parser chokes up upon - unescaped_data = stripped_data.decode('string_escape') - # strip all the HTML tags within the strings a JSON parser chokes up upon them - transformed_data = strip_tags(unescaped_data) - # parse the contents with a regular JSON parser, as they should be in a shape that ot actually works - try: - parsed_data = json.loads(transformed_data) - inline_data.append(parsed_data) - except ValueError, e: - noop() - except TypeError, e: - noop() - - return inline_data; - - def _parse_user_data (self, netflix_page_data): - """Parse out the user data from the big chunk of dicts we got from - parsing the JSON-ish data from the netflix homepage - - Parameters - ---------- - netflix_page_data : :obj:`list` - List of all the JSON-ish data that has been extracted from the Netflix homepage - see: extract_inline_netflix_page_data - - Returns - ------- - :obj:`dict` of :obj:`str` - - { - "guid": "72ERT45...", - "authURL": "145637....", - "countryOfSignup": "DE", - "emailAddress": "foo@..", - "gpsModel": "harris", - "isAdultVerified": True, - "isInFreeTrial": False, - "isKids": False, - "isTestAccount": False, - "numProfiles": 5, - "pinEnabled": True - } - """ - user_data = {}; - important_fields = [ - 'authURL', - 'countryOfSignup', - 'emailAddress', - 'gpsModel', - 'guid', - 'isAdultVerified', - 'isInFreeTrial', - 'isKids', - 'isTestAccount', - 'numProfiles', - 'pinEnabled' - ] - for item in netflix_page_data: - if 'models' in dict(item).keys(): - for important_field in important_fields: - user_data.update({important_field: item['models']['userInfo']['data'][important_field]}) - return user_data - - def _parse_profile_data (self, netflix_page_data): - """Parse out the profile data from the big chunk of dicts we got from - parsing the JSON-ish data from the netflix homepage - - Parameters - ---------- - netflix_page_data : :obj:`list` - List of all the JSON-ish data that has been extracted from the Netflix homepage - see: extract_inline_netflix_page_data - - Returns - ------- - :obj:`dict` of :obj:`dict - - { - "72ERT45...": { - "profileName": "username", - "avatar": "http://..../avatar.png", - "id": "72ERT45...", - "isAccountOwner": False, - "isActive": True, - "isFirstUse": False - } - } - """ - profiles = {}; - important_fields = [ - 'profileName', - 'isActive', - 'isFirstUse', - 'isAccountOwner' - ] - # TODO: get rid of this christmas tree of doom - for item in netflix_page_data: - if 'profiles' in dict(item).keys(): - for profile_id in item['profiles']: - if self._is_size_key(key=profile_id) == False: - profile = {'id': profile_id} - for important_field in important_fields: - profile.update({important_field: item['profiles'][profile_id]['summary'][important_field]}) - avatar_base = item['avatars']['nf'].get(item['profiles'][profile_id]['summary']['avatarName'], False); - avatar = 'https://secure.netflix.com/ffe/profiles/avatars_v2/320x320/PICON_029.png' if avatar_base == False else avatar_base['images']['byWidth']['320']['value'] - profile.update({'avatar': avatar}) - profiles.update({profile_id: profile}) - - return profiles - - def _parse_api_base_data (self, netflix_page_data): - """Parse out the api url data from the big chunk of dicts we got from - parsing the JSOn-ish data from the netflix homepage - - Parameters - ---------- - netflix_page_data : :obj:`list` - List of all the JSON-ish data that has been extracted from the Netflix homepage - see: extract_inline_netflix_page_data - - Returns - ------- - :obj:`dict` of :obj:`str - - { - "API_BASE_URL": "/shakti", - "API_ROOT": "https://www.netflix.com/api", - "BUILD_IDENTIFIER": "113b89c9", " - ICHNAEA_ROOT": "/ichnaea" - } - """ - api_data = {}; - important_fields = [ - 'API_BASE_URL', - 'API_ROOT', - 'BUILD_IDENTIFIER', - 'ICHNAEA_ROOT' - ] - for item in netflix_page_data: - if 'models' in dict(item).keys(): - for important_field in important_fields: - api_data.update({important_field: item['models']['serverDefs']['data'][important_field]}) - return api_data - - def _parse_esn_data (self, netflix_page_data): - """Parse out the esn id data from the big chunk of dicts we got from - parsing the JSOn-ish data from the netflix homepage - - Parameters - ---------- - netflix_page_data : :obj:`list` - List of all the JSON-ish data that has been extracted from the Netflix homepage - see: extract_inline_netflix_page_data - - Returns - ------- - :obj:`str` of :obj:`str - Widevine esn, something like: NFCDCH-MC-D7D6F54LOPY8J416T72MQXX3RD20ME - """ - esn = ''; - for item in netflix_page_data: - if 'models' in dict(item).keys(): - esn = item['models']['esnGeneratorModel']['data']['esn'] - return esn - - def _parse_page_contents (self, page_soup): - """Call all the parsers we need to extract all the session relevant data from the HTML page - Directly assigns it to the NetflixSession instance - - Parameters - ---------- - page_soup : :obj:`BeautifulSoup` - Instance of an BeautifulSoup document or node containing the complete page contents - """ - netflix_page_data = self.extract_inline_netflix_page_data(page_soup=page_soup) - self.user_data = self._parse_user_data(netflix_page_data=netflix_page_data) - self.esn = self._parse_esn_data(netflix_page_data=netflix_page_data) - self.api_data = self._parse_api_base_data(netflix_page_data=netflix_page_data) - self.profiles = self._parse_profile_data(netflix_page_data=netflix_page_data) + scripts = page_soup.find_all('script', attrs={'src': None}) + self.log(msg='Trying sloppy inline data parser') + inline_data = self._sloppy_parse_inline_data(scripts=scripts) + if self._verfify_auth_and_profiles_data(data=inline_data) != False: + self.log(msg='Sloppy inline data parsing successfull') + return inline_data + self.log(msg='Sloppy inline parser failed, trying JS parser') + return self._accurate_parse_inline_data(scripts=scripts) def is_logged_in (self, account): """Determines if a user is already logged in (with a valid cookie), @@ -380,12 +180,12 @@ class NetflixSession: return False if self._load_data(filename=self.data_path + '_' + account_hash) == False: # load the profiles page (to verify the user) - response = self.session.get(self._get_document_url_for(component='profiles'), verify=self.verify_ssl) + response = self._session_get(component='profiles') # parse out the needed inline information - page_soup = BeautifulSoup(response.text) - page_data = self.extract_inline_netflix_page_data(page_soup=page_soup) - self._parse_page_contents(page_soup=page_soup) + only_script_tags = SoupStrainer('script') + page_soup = BeautifulSoup(response.text, 'html.parser', parse_only=only_script_tags) + page_data = self._parse_page_contents(page_soup=page_soup) # check if the cookie is still valid for item in page_data: @@ -411,7 +211,7 @@ class NetflixSession: """Try to log in a user with its credentials & stores the cookies if the action is successfull Note: It fetches the HTML of the login page to extract the fields of the login form, - again, this is dirty, but as the fields & their values coudl change at any time, this + again, this is dirty, but as the fields & their values could change at any time, this should be the most reliable way of retrieving the information Parameters @@ -424,12 +224,12 @@ class NetflixSession: bool User could be logged in or not """ - response = self.session.get(self._get_document_url_for(component='login'), verify=self.verify_ssl) + response = self._session_get(component='login') if response.status_code != 200: return False; # collect all the login fields & their contents and add the user credentials - page_soup = BeautifulSoup(response.text) + page_soup = BeautifulSoup(response.text, 'html.parser') login_form = page_soup.find(attrs={'class' : 'ui-label-text'}).findPrevious('form') login_payload = self.parse_login_form_fields(form_soup=login_form) if 'email' in login_payload: @@ -439,8 +239,8 @@ class NetflixSession: login_payload['password'] = account['password'] # perform the login - login_response = self.session.post(self._get_document_url_for(component='login'), data=login_payload, verify=self.verify_ssl) - login_soup = BeautifulSoup(login_response.text) + login_response = self._session_post(component='login', data=login_payload) + login_soup = BeautifulSoup(login_response.text, 'html.parser') # we know that the login was successfull if we find an HTML element with the class of 'profile-name' if login_soup.find(attrs={'class' : 'profile-name'}) or login_soup.find(attrs={'class' : 'profile-icon'}): @@ -473,21 +273,17 @@ class NetflixSession: """ payload = { 'switchProfileGuid': profile_id, - '_': int(time.time()), + '_': int(time()), 'authURL': self.user_data['authURL'] } - response = self.session.get(self._get_api_url_for(component='switch_profiles'), params=payload, verify=self.verify_ssl); + response = self._session_get(component='switch_profiles', type='api', params=payload) if response.status_code != 200: return False - # fetch the index page again, so that we can fetch the corresponding user data - browse_response = self.session.get(self._get_document_url_for(component='browse'), verify=self.verify_ssl) - browse_soup = BeautifulSoup(browse_response.text) - self._parse_page_contents(page_soup=browse_soup) account_hash = self._generate_account_hash(account=account) - self._save_data(filename=self.data_path + '_' + account_hash) - return True + self.user_data['guid'] = profile_id; + return self._save_data(filename=self.data_path + '_' + account_hash) def send_adult_pin (self, pin): """Send the adult pin to Netflix in case an adult rated video requests it @@ -511,9 +307,8 @@ class NetflixSession: 'pin': pin, 'authURL': self.user_data['authURL'] } - url = self._get_api_url_for(component='adult_pin') - response = self.session.get(url, params=payload, verify=self.verify_ssl); - pin_response = self._process_response(response=response, component=url) + response = self._session_get(component='adult_pin', params=payload) + pin_response = self._process_response(response=response, component=self._get_api_url_for(component='adult_pin')) keys = pin_response.keys() if 'success' in keys: return True @@ -591,7 +386,7 @@ class NetflixSession: 'authURL': self.user_data['authURL'] }) - response = self.session.post(self._get_api_url_for(component='set_video_rating'), params=params, headers=headers, data=payload, verify=self.verify_ssl) + response = self._session_post(component='set_video_rating', type='api', params=params, headers=headers, data=payload) return response.status_code == 200 def parse_video_list_ids (self, response_data): @@ -651,17 +446,22 @@ class NetflixSession: for key in self.video_list_keys: video_list_ids[key] = {} + # check if the list items are hidden behind a `value` sub key + # this is the case when we fetch the lists via POST, not via a GET preflight request + if 'value' in response_data.keys(): + response_data = response_data['value'] + # subcatogorize the lists by their context video_lists = response_data['lists'] for video_list_id in video_lists.keys(): video_list = video_lists[video_list_id] - if video_list['context'] == 'genre': - video_list_ids['genres'].update(self.parse_video_list_ids_entry(id=video_list_id, entry=video_list)) - elif video_list['context'] == 'similars' or video_list['context'] == 'becauseYouAdded': - video_list_ids['recommendations'].update(self.parse_video_list_ids_entry(id=video_list_id, entry=video_list)) - else: - video_list_ids['user'].update(self.parse_video_list_ids_entry(id=video_list_id, entry=video_list)) - + if video_list.get('context', False) != False: + if video_list['context'] == 'genre': + video_list_ids['genres'].update(self.parse_video_list_ids_entry(id=video_list_id, entry=video_list)) + elif video_list['context'] == 'similars' or video_list['context'] == 'becauseYouAdded': + video_list_ids['recommendations'].update(self.parse_video_list_ids_entry(id=video_list_id, entry=video_list)) + else: + video_list_ids['user'].update(self.parse_video_list_ids_entry(id=video_list_id, entry=video_list)) return video_list_ids def parse_video_list_ids_entry (self, id, entry): @@ -974,7 +774,7 @@ class NetflixSession: 'synopsis': video['synopsis'], 'regular_synopsis': video['regularSynopsis'], 'type': video['summary']['type'], - 'rating': video['userRating']['average'], + 'rating': video['userRating'].get('average', 0) if video['userRating'].get('average', None) != None else video['userRating'].get('predicted', 0), 'episode_count': season_info['episode_count'], 'seasons_label': season_info['seasons_label'], 'seasons_count': season_info['seasons_count'], @@ -1095,12 +895,14 @@ class NetflixSession: List of genres """ video_genres = [] - for genre_key in dict(genres).keys(): - if self._is_size_key(key=genre_key) == False and genre_key != 'summary': - for show_genre_key in dict(video['genres']).keys(): - if self._is_size_key(key=show_genre_key) == False and show_genre_key != 'summary': - if video['genres'][show_genre_key][1] == genre_key: - video_genres.append(genres[genre_key]['name']) + + for video_genre_key, video_genre in video['genres'].iteritems(): + if self._is_size_key(video_genre_key) == False and video_genre_key != 'summary': + name = genres.get(video_genre[1], {}).get('name') + + if name: + video_genres.append(name) + return video_genres def parse_tags_for_video (self, video): @@ -1161,11 +963,11 @@ class NetflixSession: :obj:`str` Quality of the video """ - quality = '540' + quality = '720' if video['videoQuality']['hasHD']: - quality = '720' - if video['videoQuality']['hasUltraHD']: quality = '1080' + if video['videoQuality']['hasUltraHD']: + quality = '4000' return quality def parse_runtime_for_video (self, video): @@ -1274,14 +1076,29 @@ class NetflixSession: } } """ - seasons = {} raw_seasons = response_data['value'] + videos = raw_seasons['videos'] + + # get art video key + video = {} + for key, video_candidate in videos.iteritems(): + if not self._is_size_key(key): + video = video_candidate + + # get season index + sorting = {} + for idx, season_list_entry in video['seasonList'].iteritems(): + if self._is_size_key(key=idx) == False and idx != 'summary': + sorting[int(season_list_entry[1])] = int(idx) + + seasons = {} + for season in raw_seasons['seasons']: if self._is_size_key(key=season) == False: - seasons.update(self.parse_season_entry(season=raw_seasons['seasons'][season], videos=raw_seasons['videos'])) + seasons.update(self._parse_season_entry(season=raw_seasons['seasons'][season], video=video, sorting=sorting)) return seasons - def parse_season_entry (self, season, videos): + def _parse_season_entry (self, season, video, sorting): """Parse a season list entry e.g. rip out the parts we need Parameters @@ -1307,21 +1124,17 @@ class NetflixSession: } } """ - # get art video key - video_key = '' - for key in videos.keys(): - if self._is_size_key(key=key) == False: - video_key = key return { season['summary']['id']: { + 'idx': sorting[season['summary']['id']], 'id': season['summary']['id'], 'text': season['summary']['name'], 'shortName': season['summary']['shortName'], 'boxarts': { - 'small': videos[video_key]['boxarts']['_342x192']['jpg']['url'], - 'big': videos[video_key]['boxarts']['_1280x720']['jpg']['url'] + 'small': video['boxarts']['_342x192']['jpg']['url'], + 'big': video['boxarts']['_1280x720']['jpg']['url'] }, - 'interesting_moment': videos[video_key]['interestingMoment']['_665x375']['jpg']['url'], + 'interesting_moment': video['interestingMoment']['_665x375']['jpg']['url'], } } @@ -1434,6 +1247,11 @@ class NetflixSession: }, } """ + mpaa = '' + if episode.get('maturity', None) is not None: + if episode['maturity'].get('board', None) is not None and episode['maturity'].get('value', None) is not None: + mpaa = str(episode['maturity'].get('board', '').encode('utf-8')) + '-' + str(episode['maturity'].get('value', '').encode('utf-8')) + return { episode['summary']['id']: { 'id': episode['summary']['id'], @@ -1444,10 +1262,10 @@ class NetflixSession: 'title': episode['info']['title'], 'year': episode['info']['releaseYear'], 'genres': self.parse_genres_for_video(video=episode, genres=genres), - 'mpaa': str(episode['maturity']['rating']['board']) + ' ' + str(episode['maturity']['rating']['value']), + 'mpaa': mpaa, 'maturity': episode['maturity'], 'playcount': (0, 1)[episode['watched']], - 'rating': episode['userRating']['average'], + 'rating': episode['userRating'].get('average', 0) if episode['userRating'].get('average', None) != None else episode['userRating'].get('predicted', 0), 'thumb': episode['info']['interestingMoments']['url'], 'fanart': episode['interestingMoment']['_1280x720']['jpg']['url'], 'poster': episode['boxarts']['_1280x720']['jpg']['url'], @@ -1466,11 +1284,12 @@ class NetflixSession: :obj:`BeautifulSoup` Instance of an BeautifulSoup document containing the complete page contents """ - response = self.session.get(self._get_document_url_for(component='browse'), verify=self.verify_ssl) - return BeautifulSoup(response.text) + response = self._session_get(component='browse') + return BeautifulSoup(response.text, 'html.parser') - def fetch_video_list_ids (self, list_from=0, list_to=50): + def fetch_video_list_ids_via_preflight (self, list_from=0, list_to=50): """Fetches the JSON with detailed information based on the lists on the landing page (browse page) of Netflix + via the preflight (GET) request Parameters ---------- @@ -1490,12 +1309,35 @@ class NetflixSession: 'toRow': list_to, 'opaqueImageExtension': 'jpg', 'transparentImageExtension': 'png', - '_': int(time.time()), + '_': int(time()), 'authURL': self.user_data['authURL'] } - url = self._get_api_url_for(component='video_list_ids') - response = self.session.get(url, params=payload, verify=self.verify_ssl); - return self._process_response(response=response, component=url) + + response = self._session_get(component='video_list_ids', params=payload, type='api') + return self._process_response(response=response, component=self._get_api_url_for(component='video_list_ids')) + + def fetch_video_list_ids (self, list_from=0, list_to=50): + """Fetches the JSON with detailed information based on the lists on the landing page (browse page) of Netflix + + Parameters + ---------- + list_from : :obj:`int` + Start entry for pagination + + list_to : :obj:`int` + Last entry for pagination + + Returns + ------- + :obj:`dict` of :obj:`dict` of :obj:`str` + Raw Netflix API call response or api call error + """ + paths = [ + ['lolomo', {'from': list_from, 'to': list_to}, ['displayName', 'context', 'id', 'index', 'length']] + ] + + response = self._path_request(paths=paths) + return self._process_response(response=response, component='Video list ids') def fetch_search_results (self, search_str, list_from=0, list_to=10): """Fetches the JSON which contains the results for the given search query @@ -1517,7 +1359,7 @@ class NetflixSession: Raw Netflix API call response or api call error """ # properly encode the search string - encoded_search_string = urllib.quote(search_str) + encoded_search_string = quote(search_str) paths = [ ['search', encoded_search_string, 'titles', {'from': list_from, 'to': list_to}, ['summary', 'title']], @@ -1530,7 +1372,7 @@ class NetflixSession: response = self._path_request(paths=paths) return self._process_response(response=response, component='Search results') - def fetch_video_list (self, list_id, list_from=0, list_to=20): + def fetch_video_list (self, list_id, list_from=0, list_to=26): """Fetches the JSON which contains the contents of a given video list Parameters @@ -1620,11 +1462,10 @@ class NetflixSession: payload = { 'movieid': id, 'imageformat': 'jpg', - '_': int(time.time()) + '_': int(time()) } - url = self._get_api_url_for(component='metadata') - response = self.session.get(url, params=payload, verify=self.verify_ssl); - return self._process_response(response=response, component=url) + response = self._session_get(component='metadata', params=payload, type='api') + return self._process_response(response=response, component=self._get_api_url_for(component='metadata')) def fetch_show_information (self, id, type): """Fetches the JSON which contains the detailed contents of a show @@ -1733,11 +1574,11 @@ class NetflixSession: Dict containing an email, country & a password property """ # load the profiles page (to verify the user) - response = self.session.get(self._get_document_url_for(component='profiles'), verify=self.verify_ssl) + response = self._session_get(component='profiles') # parse out the needed inline information - page_soup = BeautifulSoup(response.text) - page_data = self.extract_inline_netflix_page_data(page_soup=page_soup) - self._parse_page_contents(page_soup) + only_script_tags = SoupStrainer('script') + page_soup = BeautifulSoup(response.text, 'html.parser', parse_only=only_script_tags) + page_data = self._parse_page_contents(page_soup=page_soup) account_hash = self._generate_account_hash(account=account) self._save_data(filename=self.data_path + '_' + account_hash) @@ -1765,12 +1606,10 @@ class NetflixSession: }) params = { - 'withSize': True, - 'materialize': True, 'model': self.user_data['gpsModel'] } - return self.session.post(self._get_api_url_for(component='shakti'), params=params, headers=headers, data=data, verify=self.verify_ssl) + return self._session_post(component='shakti', type='api', params=params, headers=headers, data=data) def _is_size_key (self, key): """Tiny helper that checks if a given key is called $size or size, as we need to check this often @@ -1800,7 +1639,10 @@ class NetflixSession: :obj:`str` API Url """ - return self.api_data['API_ROOT'] + self.api_data['API_BASE_URL'] + '/' + self.api_data['BUILD_IDENTIFIER'] + self.urls[component] + if self.api_data['API_ROOT'].find(self.api_data['API_BASE_URL']) > -1: + return self.api_data['API_ROOT'] + '/' + self.api_data['BUILD_IDENTIFIER'] + self.urls[component] + else: + return self.api_data['API_ROOT'] + self.api_data['API_BASE_URL'] + '/' + self.api_data['BUILD_IDENTIFIER'] + self.urls[component] def _get_document_url_for (self, component): """Tiny helper that builds the url for a requested document endpoint component @@ -1850,6 +1692,34 @@ class NetflixSession: # return the parsed response & everything´s fine return response.json() + def _to_unicode(self, str): + '''Attempt to fix non uft-8 string into utf-8, using a limited set of encodings + + Parameters + ---------- + str : `str` + String to decode + + Returns + ------- + `str` + Decoded string + ''' + # fuller list of encodings at http://docs.python.org/library/codecs.html#standard-encodings + if not str: return u'' + u = None + # we could add more encodings here, as warranted. + encodings = ('ascii', 'utf8', 'latin1') + for enc in encodings: + if u: break + try: + u = unicode(str,enc) + except UnicodeDecodeError: + pass + if not u: + u = unicode(str, errors='replace') + return u + def _update_my_list (self, video_id, operation): """Tiny helper to add & remove items from "my list" @@ -1877,7 +1747,7 @@ class NetflixSession: 'authURL': self.user_data['authURL'] }) - response = self.session.post(self._get_api_url_for(component='update_my_list'), headers=headers, data=payload, verify=self.verify_ssl) + response = self._session_post(component='update_my_list', type='api', headers=headers, data=payload) return response.status_code == 200 def _save_data(self, filename): @@ -1979,10 +1849,10 @@ class NetflixSession: return False with open(filename) as f: - cookies = pickle.load(f) - if cookies: - jar = requests.cookies.RequestsCookieJar() - jar._cookies = cookies + _cookies = pickle.load(f) + if _cookies: + jar = cookies.RequestsCookieJar() + jar._cookies = _cookies self.session.cookies = jar else: return False @@ -2015,4 +1885,453 @@ class NetflixSession: :obj:`str` Account data hash """ - return base64.urlsafe_b64encode(account['email']) + return urlsafe_b64encode(account['email']) + + def _session_post (self, component, type='document', data={}, headers={}, params={}): + """Executes a get request using requests for the current session & measures the duration of that request + + Parameters + ---------- + component : :obj:`str` + Component to query + + type : :obj:`str` + Is it a document or API request ('document' is default) + + data : :obj:`dict` of :obj:`str` + Payload body as dict + + header : :obj:`dict` of :obj:`str` + Additional headers as dict + + params : :obj:`dict` of :obj:`str` + Request params + + Returns + ------- + :obj:`str` + Contents of the field to match + """ + url = self._get_document_url_for(component=component) if type == 'document' else self._get_api_url_for(component=component) + start = time() + response = self.session.post(url=url, data=data, params=params, headers=headers, verify=self.verify_ssl) + end = time() + self.log(msg='[POST] Request for "' + url + '" took ' + str(end - start) + ' seconds') + return response + + def _session_get (self, component, type='document', params={}): + """Executes a get request using requests for the current session & measures the duration of that request + + Parameters + ---------- + component : :obj:`str` + Component to query + + type : :obj:`str` + Is it a document or API request ('document' is default) + + params : :obj:`dict` of :obj:`str` + Request params + + Returns + ------- + :obj:`str` + Contents of the field to match + """ + url = self._get_document_url_for(component=component) if type == 'document' else self._get_api_url_for(component=component) + start = time() + response = self.session.get(url=url, verify=self.verify_ssl, params=params) + end = time() + self.log(msg='[GET] Request for "' + url + '" took ' + str(end - start) + ' seconds') + return response + + def _sloppy_parse_user_and_api_data (self, key, contents): + """Try to find the user & API data from the inline js by using a string parser + + Parameters + ---------- + key : :obj:`str` + Key to match in the inline js + + contents : :obj:`str` + Inline JS contents + + Returns + ------- + :obj:`str` + Contents of the field to match + """ + key_start = contents.find(key + '"') + if int(key_start) == -1: + return None + sub_contents = contents[int(key_start):] + l = sub_contents.find('",') + return contents[(int(key_start)+len(key)+3):int(key_start)+l].decode('string_escape') + + def _sloppy_parse_profiles (self, contents): + """Try to find the profile data from the inline js by using a string parser & parse/convert the result to JSON + + Parameters + ---------- + contents : :obj:`str` + Inline JS contents + + Returns + ------- + :obj:`dict` of :obj:`str` or None + Profile data + """ + profile_start = contents.find('profiles":') + profile_list_start = contents.find('profilesList') + if int(profile_start) > -1 and int(profile_list_start) > -1: + try: + try: + return json.loads('{"a":{"' + contents[profile_start:profile_list_start-2].decode('string_escape') + '}}').get('a').get('profiles') + except ValueError, e: + return None + except TypeError, e: + return None + return None + + def _sloppy_parse_avatars (self, contents): + """Try to find the avatar data from the inline js by using a string parser & parse/convert the result to JSON + + Parameters + ---------- + contents : :obj:`str` + Inline JS contents + + Returns + ------- + :obj:`dict` of :obj:`str` or None + Avatar data + """ + avatars_start = contents.find('"nf":') + avatars_list_start = contents.find('"profiles"') + if int(avatars_start) > -1 and int(avatars_list_start) > -1: + try: + try: + return json.loads('{' + contents[avatars_start:avatars_list_start-2].decode('string_escape') + '}') + except ValueError, e: + return None + except TypeError, e: + return None + return None + + def _verfify_auth_and_profiles_data (self, data): + """Checks if the authURL has at least a certain length & doesn't overrule a certain length & if the profiles dict exists + Simple validity check for the sloppy data parser + + Parameters + ---------- + data : :obj:`dict` of :obj:`str` + Parsed JS contents + + Returns + ------- + bool + Data is valid + """ + if type(data.get('profiles')) == dict: + if len(str(data.get('authURL', ''))) > 10 and len(str(data.get('authURL', ''))) < 50: + return True + return False + + def _sloppy_parse_inline_data (self, scripts): + """Strips out all the needed user, api & profile data from the inline JS by string parsing + Might fail, so if this doesn't succeed, a proper JS parser will chime in + + Note: This has been added for performance reasons only + + Parameters + ---------- + scripts : :obj:`list` of :obj:`BeautifoulSoup` + Script tags & contents from the Netflix browse page + + Returns + ------- + :obj:`dict` of :obj:`str` + Dict containijg user, api & profile data + """ + inline_data = {}; + for script in scripts: + contents = str(script.contents[0]) + important_data = ['authURL', 'API_BASE_URL', 'API_ROOT', 'BUILD_IDENTIFIER', 'ICHNAEA_ROOT', 'gpsModel', 'guid', 'esn'] + res = {} + for key in important_data: + _res = self._sloppy_parse_user_and_api_data(key, contents) + if _res != None: + res.update({key: _res}) + if res != {}: + inline_data.update(res) + + # parse profiles + profiles = self._sloppy_parse_profiles(contents) + avatars = self._sloppy_parse_avatars(contents) + if profiles != None: + inline_data.update({'profiles': profiles}) + if avatars != None: + inline_data.update(avatars) + return inline_data + + def _accurate_parse_inline_data (self, scripts): + """Uses a proper JS parser to fetch all the api, iser & profile data from within the inline JS + + Note: This is slow but accurate + + Parameters + ---------- + scripts : :obj:`list` of :obj:`BeautifoulSoup` + Script tags & contents from the Netflix browse page + + Returns + ------- + :obj:`dict` of :obj:`str` + Dict containing user, api & profile data + """ + inline_data = [] + from pyjsparser import PyJsParser + parser = PyJsParser() + for script in scripts: + data = {} + # unicode escape that incoming script stuff + contents = self._to_unicode(str(script.contents[0])) + # parse the JS & load the declarations we´re interested in + parsed = parser.parse(contents) + if len(parsed['body']) > 1 and parsed['body'][1]['expression']['right'].get('properties', None) != None: + declarations = parsed['body'][1]['expression']['right']['properties'] + for declaration in declarations: + for key in declaration: + # we found the correct path if the declaration is a dict & of type 'ObjectExpression' + if type(declaration[key]) is dict: + if declaration[key]['type'] == 'ObjectExpression': + # add all static data recursivly + for expression in declaration[key]['properties']: + data[expression['key']['value']] = self._parse_rec(expression['value']) + inline_data.append(data) + return inline_data + + def _parse_rec (self, node): + """Iterates over a JavaScript AST and return values found + + Parameters + ---------- + value : :obj:`dict` + JS AST Expression + Returns + ------- + :obj:`dict` of :obj:`dict` or :obj:`str` + Parsed contents of the node + """ + if node['type'] == 'ObjectExpression': + _ret = {} + for prop in node['properties']: + _ret.update({prop['key']['value']: self._parse_rec(prop['value'])}) + return _ret + if node['type'] == 'Literal': + return node['value'] + + def _parse_user_data (self, netflix_page_data): + """Parse out the user data from the big chunk of dicts we got from + parsing the JSON-ish data from the netflix homepage + + Parameters + ---------- + netflix_page_data : :obj:`list` + List of all the JSON-ish data that has been extracted from the Netflix homepage + see: extract_inline_netflix_page_data + + Returns + ------- + :obj:`dict` of :obj:`str` + + { + "guid": "72ERT45...", + "authURL": "145637....", + "gpsModel": "harris" + } + """ + user_data = {}; + important_fields = [ + 'authURL', + 'gpsModel', + 'guid' + ] + + # values are accessible via dict (sloppy parsing successfull) + if type(netflix_page_data) == dict: + for important_field in important_fields: + user_data.update({important_field: netflix_page_data.get(important_field, '')}) + return user_data + + # values are stored in lists (returned from JS parser) + for item in netflix_page_data: + if 'memberContext' in dict(item).keys(): + for important_field in important_fields: + user_data.update({important_field: item['memberContext']['data']['userInfo'][important_field]}) + + return user_data + + def _parse_profile_data (self, netflix_page_data): + """Parse out the profile data from the big chunk of dicts we got from + parsing the JSON-ish data from the netflix homepage + + Parameters + ---------- + netflix_page_data : :obj:`list` + List of all the JSON-ish data that has been extracted from the Netflix homepage + see: extract_inline_netflix_page_data + + Returns + ------- + :obj:`dict` of :obj:`dict + + { + "72ERT45...": { + "profileName": "username", + "avatar": "http://..../avatar.png", + "id": "72ERT45...", + "isAccountOwner": False, + "isActive": True, + "isFirstUse": False + } + } + """ + profiles = {}; + important_fields = [ + 'profileName', + 'isActive', + 'isAccountOwner', + 'isKids' + ] + # values are accessible via dict (sloppy parsing successfull) + if type(netflix_page_data) == dict: + for profile_id in netflix_page_data.get('profiles'): + if self._is_size_key(key=profile_id) == False and type(netflix_page_data['profiles'][profile_id]) == dict and netflix_page_data['profiles'][profile_id].get('avatar', False) != False: + profile = {'id': profile_id} + for important_field in important_fields: + profile.update({important_field: netflix_page_data['profiles'][profile_id]['summary'][important_field]}) + avatar_base = netflix_page_data['nf'].get(netflix_page_data['profiles'][profile_id]['summary']['avatarName'], False); + avatar = 'https://secure.netflix.com/ffe/profiles/avatars_v2/320x320/PICON_029.png' if avatar_base == False else avatar_base['images']['byWidth']['320']['value'] + profile.update({'avatar': avatar, 'isFirstUse': False}) + profiles.update({profile_id: profile}) + return profiles + + # values are stored in lists (returned from JS parser) + # TODO: get rid of this christmas tree of doom + for item in netflix_page_data: + if 'hasViewedRatingWelcomeModal' in dict(item).keys(): + for profile_id in item: + if self._is_size_key(key=profile_id) == False and type(item[profile_id]) == dict and item[profile_id].get('avatar', False) != False: + profile = {'id': profile_id} + for important_field in important_fields: + profile.update({important_field: item[profile_id]['summary'][important_field]}) + avatar_base = item['nf'].get(item[profile_id]['summary']['avatarName'], False); + avatar = 'https://secure.netflix.com/ffe/profiles/avatars_v2/320x320/PICON_029.png' if avatar_base == False else avatar_base['images']['byWidth']['320']['value'] + profile.update({'avatar': avatar}) + profiles.update({profile_id: profile}) + return profiles + + def _parse_api_base_data (self, netflix_page_data): + """Parse out the api url data from the big chunk of dicts we got from + parsing the JSOn-ish data from the netflix homepage + + Parameters + ---------- + netflix_page_data : :obj:`list` + List of all the JSON-ish data that has been extracted from the Netflix homepage + see: extract_inline_netflix_page_data + + Returns + ------- + :obj:`dict` of :obj:`str + + { + "API_BASE_URL": "/shakti", + "API_ROOT": "https://www.netflix.com/api", + "BUILD_IDENTIFIER": "113b89c9", + "ICHNAEA_ROOT": "/ichnaea" + } + """ + api_data = {}; + important_fields = [ + 'API_BASE_URL', + 'API_ROOT', + 'BUILD_IDENTIFIER', + 'ICHNAEA_ROOT' + ] + + # values are accessible via dict (sloppy parsing successfull) + if type(netflix_page_data) == dict: + for important_field in important_fields: + api_data.update({important_field: netflix_page_data.get(important_field, '')}) + return api_data + + for item in netflix_page_data: + if 'serverDefs' in dict(item).keys(): + for important_field in important_fields: + api_data.update({important_field: item['serverDefs']['data'][important_field]}) + return api_data + + def _parse_esn_data (self, netflix_page_data): + """Parse out the esn id data from the big chunk of dicts we got from + parsing the JSOn-ish data from the netflix homepage + + Parameters + ---------- + netflix_page_data : :obj:`list` + List of all the JSON-ish data that has been extracted from the Netflix homepage + see: extract_inline_netflix_page_data + + Returns + ------- + :obj:`str` of :obj:`str + ESN, something like: NFCDCH-MC-D7D6F54LOPY8J416T72MQXX3RD20ME + """ + # we generate an esn from device strings for android + import subprocess + try: + manufacturer = subprocess.check_output(["/system/bin/getprop", "ro.product.manufacturer"]) + if manufacturer: + esn = 'NFANDROID1-PRV-' + input = subprocess.check_output(["/system/bin/getprop", "ro.nrdp.modelgroup"]) + if not input: + esn = esn + 'T-L3-' + else: + esn = esn + input.strip(' \t\n\r') + '-' + esn = esn + '{:5}'.format(manufacturer.strip(' \t\n\r').upper()) + input = subprocess.check_output(["/system/bin/getprop" ,"ro.product.model"]) + esn = esn + input.strip(' \t\n\r').replace(' ', '=').upper() + self.log(msg='Android generated ESN:' + esn) + return esn + except OSError as e: + self.log(msg='Ignoring exception for non Android devices') + + # values are accessible via dict (sloppy parsing successfull) + if type(netflix_page_data) == dict: + return netflix_page_data.get('esn', '') + + esn = '' + + # values are stored in lists (returned from JS parser) + for item in netflix_page_data: + if 'esnGeneratorModel' in dict(item).keys(): + esn = item['esnGeneratorModel']['data']['esn'] + return esn + + def _parse_page_contents (self, page_soup): + """Call all the parsers we need to extract all the session relevant data from the HTML page + Directly assigns it to the NetflixSession instance + + Parameters + ---------- + page_soup : :obj:`BeautifulSoup` + Instance of an BeautifulSoup document or node containing the complete page contents + """ + netflix_page_data = self.extract_inline_netflix_page_data(page_soup=page_soup) + self.user_data = self._parse_user_data(netflix_page_data=netflix_page_data) + self.esn = self._parse_esn_data(netflix_page_data=netflix_page_data) + self.api_data = self._parse_api_base_data(netflix_page_data=netflix_page_data) + self.profiles = self._parse_profile_data(netflix_page_data=netflix_page_data) + self.log(msg='Found ESN "' + self.esn + '"') + return netflix_page_data