import cPickle as pickle
except:
import pickle
-from bs4 import BeautifulSoup
+from bs4 import BeautifulSoup, SoupStrainer
from pyjsparser import PyJsParser
from utils import noop
if 'memberContext' in dict(item).keys():
for important_field in important_fields:
user_data.update({important_field: item['memberContext']['data']['userInfo'][important_field]})
- print '.............'
- print user_data
- print '.............'
return user_data
def _parse_profile_data (self, netflix_page_data):
for item in netflix_page_data:
if 'hasViewedRatingWelcomeModal' in dict(item).keys():
for profile_id in item:
- print '------------'
- print profile_id
- print '------------'
if self._is_size_key(key=profile_id) == False and type(item[profile_id]) == dict and item[profile_id].get('avatar', False) != False:
profile = {'id': profile_id}
for important_field in important_fields:
self.esn = self._parse_esn_data(netflix_page_data=netflix_page_data)
self.api_data = self._parse_api_base_data(netflix_page_data=netflix_page_data)
self.profiles = self._parse_profile_data(netflix_page_data=netflix_page_data)
- if self.user_data.get('bauthURL', False) == False:
- print '...............'
- print page_soup.text.find('authURL');
- print '...............'
-
+ return netflix_page_data
def is_logged_in (self, account):
"""Determines if a user is already logged in (with a valid cookie),
response = self.session.get(self._get_document_url_for(component='profiles'), verify=self.verify_ssl)
# parse out the needed inline information
- page_soup = BeautifulSoup(response.text)
- page_data = self.extract_inline_netflix_page_data(page_soup=page_soup)
- self._parse_page_contents(page_soup=page_soup)
+ only_script_tags = SoupStrainer('script')
+ page_soup = BeautifulSoup(response.text, 'html.parser', parse_only=only_script_tags)
+ page_data = self._parse_page_contents(page_soup=page_soup)
# check if the cookie is still valid
for item in page_data:
# perform the login
login_response = self.session.post(self._get_document_url_for(component='login'), data=login_payload, verify=self.verify_ssl)
- login_soup = BeautifulSoup(login_response.text)
+ login_soup = BeautifulSoup(login_response.text, 'html.parser')
# we know that the login was successfull if we find an HTML element with the class of 'profile-name'
if login_soup.find(attrs={'class' : 'profile-name'}) or login_soup.find(attrs={'class' : 'profile-icon'}):
# fetch the index page again, so that we can fetch the corresponding user data
browse_response = self.session.get(self._get_document_url_for(component='browse'), verify=self.verify_ssl)
- browse_soup = BeautifulSoup(browse_response.text)
+ only_script_tags = SoupStrainer('script')
+ browse_soup = BeautifulSoup(response.text, 'html.parser', parse_only=only_script_tags)
self._parse_page_contents(page_soup=browse_soup)
account_hash = self._generate_account_hash(account=account)
self._save_data(filename=self.data_path + '_' + account_hash)
:obj:`str`
Quality of the video
"""
- quality = '540'
+ quality = '720'
if video['videoQuality']['hasHD']:
- quality = '720'
- if video['videoQuality']['hasUltraHD']:
quality = '1080'
+ if video['videoQuality']['hasUltraHD']:
+ quality = '4000'
return quality
def parse_runtime_for_video (self, video):
for key in videos.keys():
if self._is_size_key(key=key) == False:
video_key = key
+ # get season index
+ sorting = {}
+ for idx in videos[video_key]['seasonList']:
+ if self._is_size_key(key=idx) == False and idx != 'summary':
+ sorting[int(videos[video_key]['seasonList'][idx][1])] = int(idx)
return {
season['summary']['id']: {
+ 'idx': sorting[season['summary']['id']],
'id': season['summary']['id'],
'text': season['summary']['name'],
'shortName': season['summary']['shortName'],
Instance of an BeautifulSoup document containing the complete page contents
"""
response = self.session.get(self._get_document_url_for(component='browse'), verify=self.verify_ssl)
- return BeautifulSoup(response.text)
+ return BeautifulSoup(response.text, 'html.parser')
def fetch_video_list_ids (self, list_from=0, list_to=50):
"""Fetches the JSON with detailed information based on the lists on the landing page (browse page) of Netflix
# load the profiles page (to verify the user)
response = self.session.get(self._get_document_url_for(component='profiles'), verify=self.verify_ssl)
# parse out the needed inline information
- page_soup = BeautifulSoup(response.text)
- page_data = self.extract_inline_netflix_page_data(page_soup=page_soup)
- self._parse_page_contents(page_soup)
+ only_script_tags = SoupStrainer('script')
+ page_soup = BeautifulSoup(response.text, 'html.parser', parse_only=only_script_tags)
+ page_data = self._parse_page_contents(page_soup=page_soup)
account_hash = self._generate_account_hash(account=account)
self._save_data(filename=self.data_path + '_' + account_hash)