Simplify NetflixSession.parse_genres_for_video().

[plugin.video.netflix.git] / resources / lib / NetflixSession.py
diff --git a/resources/lib/NetflixSession.py b/resources/lib/NetflixSession.py

index b2416c298a6f5a43cad179188db043563c2ecb29..62e58fd0d76fc05ba78667774a420e61b50b7f79 100644 (file)
--- a/resources/lib/NetflixSession.py
+++ b/resources/lib/NetflixSession.py
@@ -3,35 +3,37 @@
  # Module: NetflixSession
  # Created on: 13.01.2017
  
-import sys
  import os
-import base64
-import time
-import urllib
  import json
-import requests
-import pickle
-from BeautifulSoup import BeautifulSoup
-from utils import strip_tags
-from utils import noop
+from requests import session, cookies
+from urllib import quote, unquote
+from time import time
+from base64 import urlsafe_b64encode
+from bs4 import BeautifulSoup, SoupStrainer
+from utils import noop, get_user_agent_for_current_platform
+try:
+   import cPickle as pickle
+except:
+   import pickle
  
  class NetflixSession:
      """Helps with login/session management of Netflix users & API data fetching"""
  
-    base_url = 'https://www.netflix.com/'
+    base_url = 'https://www.netflix.com'
      """str: Secure Netflix url"""
  
      urls = {
          'login': '/login',
-        'browse': '/browse',
-        'video_list_ids': '/warmer',
+        'browse': '/profiles/manage',
+        'video_list_ids': '/preflight',
          'shakti': '/pathEvaluator',
-        'profiles':  '/profiles',
+        'profiles':  '/profiles/manage',
          'switch_profiles': '/profiles/switch',
          'adult_pin': '/pin/service',
          'metadata': '/metadata',
          'set_video_rating': '/setVideoRating',
-        'update_my_list': '/playlistop'
+        'update_my_list': '/playlistop',
+        'kids': '/Kids'
      }
      """:obj:`dict` of :obj:`str` List of all static endpoints for HTML/JSON POST/GET requests"""
  
@@ -59,15 +61,7 @@ class NetflixSession:
          {
              "guid": "72ERT45...",
              "authURL": "145637....",
-            "countryOfSignup": "DE",
-            "emailAddress": "foo@..",
-            "gpsModel": "harris",
-            "isAdultVerified": True,
-            "isInFreeTrial": False,
-            "isKids": False,
-            "isTestAccount": False,
-            "numProfiles": 5,
-            "pinEnabled": True
+            "gpsModel": "harris"
          }
      """
  
@@ -84,9 +78,9 @@ class NetflixSession:
      """
  
      esn = ''
-    """str: Widevine esn, something like: NFCDCH-MC-D7D6F54LOPY8J416T72MQXX3RD20ME"""
+    """str: ESN - something like: NFCDCH-MC-D7D6F54LOPY8J416T72MQXX3RD20ME"""
  
-    def __init__(self, cookie_path, data_path, log_fn=noop):
+    def __init__(self, cookie_path, data_path, verify_ssl=True, log_fn=noop):
          """Stores the cookie path for later use & instanciates a requests
             session with a proper user agent & stored cookies/data if available
  
@@ -103,13 +97,14 @@ class NetflixSession:
          """
          self.cookie_path = cookie_path
          self.data_path = data_path
+        self.verify_ssl = verify_ssl
          self.log = log_fn
  
-        # start session, fake chrome (so that we get a proper widevine esn) & enable gzip
-        self.session = requests.session()
+        # start session, fake chrome on the current platform (so that we get a proper widevine esn) & enable gzip
+        self.session = session()
          self.session.headers.update({
-            'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36',
-            'Accept-Encoding': 'gzip, deflate'
+            'User-Agent': get_user_agent_for_current_platform(),
+            'Accept-Encoding': 'gzip'
          })
  
      def parse_login_form_fields (self, form_soup):
@@ -128,7 +123,7 @@ class NetflixSession:
                  value from the form field
          """
          login_input_fields = {}
-        login_inputs = form_soup.findAll('input')
+        login_inputs = form_soup.find_all('input')
          # gather all form fields, set an empty string as the default value
          for item in login_inputs:
              keys = dict(item.attrs).keys()
@@ -141,216 +136,27 @@ class NetflixSession:
      def extract_inline_netflix_page_data (self, page_soup):
          """Extracts all <script/> tags from the given document and parses the contents of each one of `em.
          The contents of the parsable tags looks something like this:
-
-            <script>window.netflix = window.netflix || {} ;
-                    netflix.notification = {"constants":{"sessionLength":30,"ownerToken":"ZDD...};</script>
-
-        So we´re extracting every JavaScript object contained in the `netflix.x = {};` variable,
-        strip all html tags, unescape the whole thing & finally parse the resulting serialized JSON from this
-        operations. Errors are expected, as not all <script/> tags contained in the page follow these pattern,
-        but the ones we need do, so we´re just catching any errors and applying a noop() function in case this happens,
-        as we´re not interested in those.
-
-        Note: Yes this is ugly & I´d like to avoid doing this, but Netflix leaves us no other choice,
-            as there are simply no api endpoints for the data, we need to extract them from HTML,
-            or better, JavaScript as we´re parsing the contents of <script/> tags
+            <script>window.netflix = window.netflix || {} ; netflix.notification = {"constants":{"sessionLength":30,"ownerToken":"ZDD...};</script>
+        We use a JS parser to generate an AST of the code given & then parse that AST into a python dict.
+        This should be okay, as we´re only interested in a few static values & put the rest aside
  
          Parameters
          ----------
          page_soup : :obj:`BeautifulSoup`
              Instance of an BeautifulSoup document or node containing the complete page contents
-
          Returns
          -------
              :obj:`list` of :obj:`dict`
                  List of all the serialized data pulled out of the pagws <script/> tags
          """
-        inline_data = [];
-        data_scripts = page_soup.findAll('script', attrs={'src': None});
-        for script in data_scripts:
-            # ugly part: try to parse the data & don't care about errors (as they will be some)
-            try:
-                # find the first occurance of the 'netflix.' string, assigning the contents to a global js var
-                str_index = str(script).find('netflix.')
-                # filter out the contents between the 'netflix.x =' & ';<script>'
-                stripped_data = str(script)[str_index:][(str(script)[str_index:].find('= ') + 2):].replace(';</script>', '').strip()
-                # unescape the contents as they contain characters a JSON parser chokes up upon
-                unescaped_data = stripped_data.decode('string_escape')
-                # strip all the HTML tags within the strings a JSON parser chokes up upon them
-                transformed_data = strip_tags(unescaped_data)
-                # parse the contents with a regular JSON parser, as they should be in a shape that ot actually works
-                try:
-                    parsed_data = json.loads(transformed_data)
-                    inline_data.append(parsed_data)
-                except ValueError, e:
-                    noop()
-            except TypeError, e:
-                noop()
-
-        return inline_data;
-
-    def _parse_user_data (self, netflix_page_data):
-        """Parse out the user data from the big chunk of dicts we got from
-           parsing the JSON-ish data from the netflix homepage
-
-        Parameters
-        ----------
-        netflix_page_data : :obj:`list`
-            List of all the JSON-ish data that has been extracted from the Netflix homepage
-            see: extract_inline_netflix_page_data
-
-        Returns
-        -------
-            :obj:`dict` of :obj:`str`
-
-            {
-                "guid": "72ERT45...",
-                "authURL": "145637....",
-                "countryOfSignup": "DE",
-                "emailAddress": "foo@..",
-                "gpsModel": "harris",
-                "isAdultVerified": True,
-                "isInFreeTrial": False,
-                "isKids": False,
-                "isTestAccount": False,
-                "numProfiles": 5,
-                "pinEnabled": True
-            }
-        """
-        user_data = {};
-        important_fields = [
-            'authURL',
-            'countryOfSignup',
-            'emailAddress',
-            'gpsModel',
-            'guid',
-            'isAdultVerified',
-            'isInFreeTrial',
-            'isKids',
-            'isTestAccount',
-            'numProfiles',
-            'pinEnabled'
-        ]
-        for item in netflix_page_data:
-            if 'models' in dict(item).keys():
-                for important_field in important_fields:
-                    user_data.update({important_field: item['models']['userInfo']['data'][important_field]})
-        return user_data
-
-    def _parse_profile_data (self, netflix_page_data):
-        """Parse out the profile data from the big chunk of dicts we got from
-           parsing the JSON-ish data from the netflix homepage
-
-        Parameters
-        ----------
-        netflix_page_data : :obj:`list`
-            List of all the JSON-ish data that has been extracted from the Netflix homepage
-            see: extract_inline_netflix_page_data
-
-        Returns
-        -------
-            :obj:`dict` of :obj:`dict
-
-            {
-                "72ERT45...": {
-                    "profileName": "username",
-                    "avatar": "http://..../avatar.png",
-                    "id": "72ERT45...",
-                    "isAccountOwner": False,
-                    "isActive": True,
-                    "isFirstUse": False
-                }
-            }
-        """
-        profiles = {};
-        important_fields = [
-            'profileName',
-            'isActive',
-            'isFirstUse',
-            'isAccountOwner'
-        ]
-        # TODO: get rid of this christmas tree of doom
-        for item in netflix_page_data:
-            if 'profiles' in dict(item).keys():
-                for profile_id in item['profiles']:
-                    if self._is_size_key(key=profile_id) == False:
-                        profile = {'id': profile_id}
-                        for important_field in important_fields:
-                            profile.update({important_field: item['profiles'][profile_id]['summary'][important_field]})
-                        profile.update({'avatar': item['avatars']['nf'][item['profiles'][profile_id]['summary']['avatarName']]['images']['byWidth']['320']['value']})
-                        profiles.update({profile_id: profile})
-
-        return profiles
-
-    def _parse_api_base_data (self, netflix_page_data):
-        """Parse out the api url data from the big chunk of dicts we got from
-           parsing the JSOn-ish data from the netflix homepage
-
-        Parameters
-        ----------
-        netflix_page_data : :obj:`list`
-            List of all the JSON-ish data that has been extracted from the Netflix homepage
-            see: extract_inline_netflix_page_data
-
-        Returns
-        -------
-            :obj:`dict` of :obj:`str
-
-            {
-                "API_BASE_URL": "/shakti",
-                "API_ROOT": "https://www.netflix.com/api",
-                "BUILD_IDENTIFIER": "113b89c9", "
-                ICHNAEA_ROOT": "/ichnaea"
-            }
-        """
-        api_data = {};
-        important_fields = [
-            'API_BASE_URL',
-            'API_ROOT',
-            'BUILD_IDENTIFIER',
-            'ICHNAEA_ROOT'
-        ]
-        for item in netflix_page_data:
-            if 'models' in dict(item).keys():
-                for important_field in important_fields:
-                    api_data.update({important_field: item['models']['serverDefs']['data'][important_field]})
-        return api_data
-
-    def _parse_esn_data (self, netflix_page_data):
-        """Parse out the esn id data from the big chunk of dicts we got from
-           parsing the JSOn-ish data from the netflix homepage
-
-        Parameters
-        ----------
-        netflix_page_data : :obj:`list`
-            List of all the JSON-ish data that has been extracted from the Netflix homepage
-            see: extract_inline_netflix_page_data
-
-        Returns
-        -------
-            :obj:`str` of :obj:`str
-            Widevine esn, something like: NFCDCH-MC-D7D6F54LOPY8J416T72MQXX3RD20ME
-        """
-        esn = '';
-        for item in netflix_page_data:
-            if 'models' in dict(item).keys():
-                esn = item['models']['esnGeneratorModel']['data']['esn']
-        return esn
-
-    def _parse_page_contents (self, page_soup):
-        """Call all the parsers we need to extract all the session relevant data from the HTML page
-           Directly assigns it to the NetflixSession instance
-
-        Parameters
-        ----------
-        page_soup : :obj:`BeautifulSoup`
-            Instance of an BeautifulSoup document or node containing the complete page contents
-        """
-        netflix_page_data = self.extract_inline_netflix_page_data(page_soup=page_soup)
-        self.user_data = self._parse_user_data(netflix_page_data=netflix_page_data)
-        self.esn = self._parse_esn_data(netflix_page_data=netflix_page_data)
-        self.api_data = self._parse_api_base_data(netflix_page_data=netflix_page_data)
-        self.profiles = self._parse_profile_data(netflix_page_data=netflix_page_data)
+        scripts = page_soup.find_all('script', attrs={'src': None})
+        self.log(msg='Trying sloppy inline data parser')
+        inline_data = self._sloppy_parse_inline_data(scripts=scripts)
+        if self._verfify_auth_and_profiles_data(data=inline_data) != False:
+            self.log(msg='Sloppy inline data parsing successfull')
+            return inline_data
+        self.log(msg='Sloppy inline parser failed, trying JS parser')
+        return self._accurate_parse_inline_data(scripts=scripts)
  
      def is_logged_in (self, account):
          """Determines if a user is already logged in (with a valid cookie),
@@ -374,12 +180,12 @@ class NetflixSession:
              return False
          if self._load_data(filename=self.data_path + '_' + account_hash) == False:
              # load the profiles page (to verify the user)
-            response = self.session.get(self._get_document_url_for(component='profiles'))
+            response = self._session_get(component='profiles')
  
              # parse out the needed inline information
-            page_soup = BeautifulSoup(response.text)
-            page_data = self.extract_inline_netflix_page_data(page_soup=page_soup)
-            self._parse_page_contents(page_soup=page_soup)
+            only_script_tags = SoupStrainer('script')
+            page_soup = BeautifulSoup(response.text, 'html.parser', parse_only=only_script_tags)
+            page_data = self._parse_page_contents(page_soup=page_soup)
  
              # check if the cookie is still valid
              for item in page_data:
@@ -405,7 +211,7 @@ class NetflixSession:
          """Try to log in a user with its credentials & stores the cookies if the action is successfull
  
             Note: It fetches the HTML of the login page to extract the fields of the login form,
-           again, this is dirty, but as the fields & their values coudl change at any time, this
+           again, this is dirty, but as the fields & their values could change at any time, this
             should be the most reliable way of retrieving the information
  
          Parameters
@@ -418,12 +224,12 @@ class NetflixSession:
          bool
              User could be logged in or not
          """
-        response = self.session.get(self._get_document_url_for(component='login'))
+        response = self._session_get(component='login')
          if response.status_code != 200:
              return False;
  
          # collect all the login fields & their contents and add the user credentials
-        page_soup = BeautifulSoup(response.text)
+        page_soup = BeautifulSoup(response.text, 'html.parser')
          login_form = page_soup.find(attrs={'class' : 'ui-label-text'}).findPrevious('form')
          login_payload = self.parse_login_form_fields(form_soup=login_form)
          if 'email' in login_payload:
@@ -433,8 +239,8 @@ class NetflixSession:
          login_payload['password'] = account['password']
  
          # perform the login
-        login_response = self.session.post(self._get_document_url_for(component='login'), data=login_payload)
-        login_soup = BeautifulSoup(login_response.text)
+        login_response = self._session_post(component='login', data=login_payload)
+        login_soup = BeautifulSoup(login_response.text, 'html.parser')
  
          # we know that the login was successfull if we find an HTML element with the class of 'profile-name'
          if login_soup.find(attrs={'class' : 'profile-name'}) or login_soup.find(attrs={'class' : 'profile-icon'}):
@@ -467,21 +273,17 @@ class NetflixSession:
          """
          payload = {
              'switchProfileGuid': profile_id,
-            '_': int(time.time()),
+            '_': int(time()),
              'authURL': self.user_data['authURL']
          }
  
-        response = self.session.get(self._get_api_url_for(component='switch_profiles'), params=payload);
+        response = self._session_get(component='switch_profiles', type='api', params=payload)
          if response.status_code != 200:
              return False
  
-        # fetch the index page again, so that we can fetch the corresponding user data
-        browse_response = self.session.get(self._get_document_url_for(component='browse'))
-        browse_soup = BeautifulSoup(browse_response.text)
-        self._parse_page_contents(page_soup=browse_soup)
          account_hash = self._generate_account_hash(account=account)
-        self._save_data(filename=self.data_path + '_' + account_hash)
-        return True
+        self.user_data['guid'] = profile_id;
+        return self._save_data(filename=self.data_path + '_' + account_hash)
  
      def send_adult_pin (self, pin):
          """Send the adult pin to Netflix in case an adult rated video requests it
@@ -505,9 +307,8 @@ class NetflixSession:
              'pin': pin,
              'authURL': self.user_data['authURL']
          }
-        url = self._get_api_url_for(component='adult_pin')
-        response = self.session.get(url, params=payload);
-        pin_response = self._process_response(response=response, component=url)
+        response = self._session_get(component='adult_pin', params=payload)
+        pin_response = self._process_response(response=response, component=self._get_api_url_for(component='adult_pin'))
          keys = pin_response.keys()
          if 'success' in keys:
              return True
@@ -585,7 +386,7 @@ class NetflixSession:
              'authURL': self.user_data['authURL']
          })
  
-        response = self.session.post(self._get_api_url_for(component='set_video_rating'), params=params, headers=headers, data=payload)
+        response = self._session_post(component='set_video_rating', type='api', params=params, headers=headers, data=payload)
          return response.status_code == 200
  
      def parse_video_list_ids (self, response_data):
@@ -645,17 +446,22 @@ class NetflixSession:
          for key in self.video_list_keys:
              video_list_ids[key] = {}
  
+        # check if the list items are hidden behind a `value` sub key
+        # this is the case when we fetch the lists via POST, not via a GET preflight request
+        if 'value' in response_data.keys():
+            response_data = response_data['value']
+
          # subcatogorize the lists by their context
          video_lists = response_data['lists']
          for video_list_id in video_lists.keys():
              video_list = video_lists[video_list_id]
-            if video_list['context'] == 'genre':
-                video_list_ids['genres'].update(self.parse_video_list_ids_entry(id=video_list_id, entry=video_list))
-            elif video_list['context'] == 'similars' or video_list['context'] == 'becauseYouAdded':
-                video_list_ids['recommendations'].update(self.parse_video_list_ids_entry(id=video_list_id, entry=video_list))
-            else:
-                video_list_ids['user'].update(self.parse_video_list_ids_entry(id=video_list_id, entry=video_list))
-
+            if video_list.get('context', False) != False:
+                if video_list['context'] == 'genre':
+                    video_list_ids['genres'].update(self.parse_video_list_ids_entry(id=video_list_id, entry=video_list))
+                elif video_list['context'] == 'similars' or video_list['context'] == 'becauseYouAdded':
+                    video_list_ids['recommendations'].update(self.parse_video_list_ids_entry(id=video_list_id, entry=video_list))
+                else:
+                    video_list_ids['user'].update(self.parse_video_list_ids_entry(id=video_list_id, entry=video_list))
          return video_list_ids
  
      def parse_video_list_ids_entry (self, id, entry):
@@ -968,7 +774,7 @@ class NetflixSession:
                  'synopsis': video['synopsis'],
                  'regular_synopsis': video['regularSynopsis'],
                  'type': video['summary']['type'],
-                'rating': video['userRating']['average'],
+                'rating': video['userRating'].get('average', 0) if video['userRating'].get('average', None) != None else video['userRating'].get('predicted', 0),
                  'episode_count': season_info['episode_count'],
                  'seasons_label': season_info['seasons_label'],
                  'seasons_count': season_info['seasons_count'],
@@ -1089,12 +895,14 @@ class NetflixSession:
              List of genres
          """
          video_genres = []
-        for genre_key in dict(genres).keys():
-            if self._is_size_key(key=genre_key) == False and genre_key != 'summary':
-                for show_genre_key in dict(video['genres']).keys():
-                    if self._is_size_key(key=show_genre_key) == False and show_genre_key != 'summary':
-                        if video['genres'][show_genre_key][1] == genre_key:
-                            video_genres.append(genres[genre_key]['name'])
+
+        for video_genre_key, video_genre in video['genres'].iteritems():
+            if self._is_size_key(video_genre_key) == False and video_genre_key != 'summary':
+                name = genres.get(video_genre[1], {}).get('name')
+
+                if name:
+                    video_genres.append(name)
+
          return video_genres
  
      def parse_tags_for_video (self, video):
@@ -1155,11 +963,11 @@ class NetflixSession:
          :obj:`str`
              Quality of the video
          """
-        quality = '540'
+        quality = '720'
          if video['videoQuality']['hasHD']:
-            quality = '720'
-        if video['videoQuality']['hasUltraHD']:
              quality = '1080'
+        if video['videoQuality']['hasUltraHD']:
+            quality = '4000'
          return quality
  
      def parse_runtime_for_video (self, video):
@@ -1268,14 +1076,29 @@ class NetflixSession:
                  }
              }
          """
-        seasons = {}
          raw_seasons = response_data['value']
+        videos = raw_seasons['videos']
+
+        # get art video key
+        video = {}
+        for key, video_candidate in videos.iteritems():
+            if not self._is_size_key(key):
+                video = video_candidate
+
+        # get season index
+        sorting = {}
+        for idx, season_list_entry in video['seasonList'].iteritems():
+            if self._is_size_key(key=idx) == False and idx != 'summary':
+                sorting[int(season_list_entry[1])] = int(idx)
+
+        seasons = {}
+
          for season in raw_seasons['seasons']:
              if self._is_size_key(key=season) == False:
-                seasons.update(self.parse_season_entry(season=raw_seasons['seasons'][season], videos=raw_seasons['videos']))
+                seasons.update(self._parse_season_entry(season=raw_seasons['seasons'][season], video=video, sorting=sorting))
          return seasons
  
-    def parse_season_entry (self, season, videos):
+    def _parse_season_entry (self, season, video, sorting):
          """Parse a season list entry e.g. rip out the parts we need
  
          Parameters
@@ -1301,21 +1124,17 @@ class NetflixSession:
                  }
              }
          """
-        # get art video key
-        video_key = ''
-        for key in videos.keys():
-            if self._is_size_key(key=key) == False:
-                video_key = key
          return {
              season['summary']['id']: {
+                'idx': sorting[season['summary']['id']],
                  'id': season['summary']['id'],
                  'text': season['summary']['name'],
                  'shortName': season['summary']['shortName'],
                  'boxarts': {
-                    'small': videos[video_key]['boxarts']['_342x192']['jpg']['url'],
-                    'big': videos[video_key]['boxarts']['_1280x720']['jpg']['url']
+                    'small': video['boxarts']['_342x192']['jpg']['url'],
+                    'big': video['boxarts']['_1280x720']['jpg']['url']
                  },
-                'interesting_moment': videos[video_key]['interestingMoment']['_665x375']['jpg']['url'],
+                'interesting_moment': video['interestingMoment']['_665x375']['jpg']['url'],
              }
          }
  
@@ -1428,6 +1247,11 @@ class NetflixSession:
            },
          }
          """
+        mpaa = ''
+        if episode.get('maturity', None) is not None:
+            if episode['maturity'].get('board', None) is not None and episode['maturity'].get('value', None) is not None:
+                mpaa = str(episode['maturity'].get('board', '').encode('utf-8')) + '-' + str(episode['maturity'].get('value', '').encode('utf-8'))
+
          return {
              episode['summary']['id']: {
                  'id': episode['summary']['id'],
@@ -1438,10 +1262,10 @@ class NetflixSession:
                  'title': episode['info']['title'],
                  'year': episode['info']['releaseYear'],
                  'genres': self.parse_genres_for_video(video=episode, genres=genres),
-                'mpaa': str(episode['maturity']['rating']['board']) + ' ' + str(episode['maturity']['rating']['value']),
+                'mpaa': mpaa,
                  'maturity': episode['maturity'],
                  'playcount': (0, 1)[episode['watched']],
-                'rating': episode['userRating']['average'],
+                'rating': episode['userRating'].get('average', 0) if episode['userRating'].get('average', None) != None else episode['userRating'].get('predicted', 0),
                  'thumb': episode['info']['interestingMoments']['url'],
                  'fanart': episode['interestingMoment']['_1280x720']['jpg']['url'],
                  'poster': episode['boxarts']['_1280x720']['jpg']['url'],
@@ -1460,11 +1284,12 @@ class NetflixSession:
          :obj:`BeautifulSoup`
              Instance of an BeautifulSoup document containing the complete page contents
          """
-        response = self.session.get(self._get_document_url_for(component='browse'))
-        return BeautifulSoup(response.text)
+        response = self._session_get(component='browse')
+        return BeautifulSoup(response.text, 'html.parser')
  
-    def fetch_video_list_ids (self, list_from=0, list_to=50):
+    def fetch_video_list_ids_via_preflight (self, list_from=0, list_to=50):
          """Fetches the JSON with detailed information based on the lists on the landing page (browse page) of Netflix
+           via the preflight (GET) request
  
          Parameters
          ----------
@@ -1484,14 +1309,37 @@ class NetflixSession:
              'toRow': list_to,
              'opaqueImageExtension': 'jpg',
              'transparentImageExtension': 'png',
-            '_': int(time.time()),
+            '_': int(time()),
              'authURL': self.user_data['authURL']
          }
-        url = self._get_api_url_for(component='video_list_ids')
-        response = self.session.get(url, params=payload);
-        return self._process_response(response=response, component=url)
  
-    def fetch_search_results (self, search_str, list_from=0, list_to=48):
+        response = self._session_get(component='video_list_ids', params=payload, type='api')
+        return self._process_response(response=response, component=self._get_api_url_for(component='video_list_ids'))
+
+    def fetch_video_list_ids (self, list_from=0, list_to=50):
+        """Fetches the JSON with detailed information based on the lists on the landing page (browse page) of Netflix
+
+        Parameters
+        ----------
+        list_from : :obj:`int`
+            Start entry for pagination
+
+        list_to : :obj:`int`
+            Last entry for pagination
+
+        Returns
+        -------
+        :obj:`dict` of :obj:`dict` of :obj:`str`
+            Raw Netflix API call response or api call error
+        """
+        paths = [
+            ['lolomo', {'from': list_from, 'to': list_to}, ['displayName', 'context', 'id', 'index', 'length']]
+        ]
+
+        response = self._path_request(paths=paths)
+        return self._process_response(response=response, component='Video list ids')
+
+    def fetch_search_results (self, search_str, list_from=0, list_to=10):
          """Fetches the JSON which contains the results for the given search query
  
          Parameters
@@ -1511,17 +1359,20 @@ class NetflixSession:
              Raw Netflix API call response or api call error
          """
          # properly encode the search string
-        encoded_search_string = urllib.quote(search_str)
+        encoded_search_string = quote(search_str)
  
          paths = [
              ['search', encoded_search_string, 'titles', {'from': list_from, 'to': list_to}, ['summary', 'title']],
              ['search', encoded_search_string, 'titles', {'from': list_from, 'to': list_to}, 'boxarts', '_342x192', 'jpg'],
-            ['search', encoded_search_string, 'titles', ['id', 'length', 'name', 'trackIds', 'requestId']]
+            ['search', encoded_search_string, 'titles', ['id', 'length', 'name', 'trackIds', 'requestId']],
+            ['search', encoded_search_string, 'suggestions', 0, 'relatedvideos', {'from': list_from, 'to': list_to}, ['summary', 'title']],
+            ['search', encoded_search_string, 'suggestions', 0, 'relatedvideos', {'from': list_from, 'to': list_to}, 'boxarts', '_342x192', 'jpg'],
+            ['search', encoded_search_string, 'suggestions', 0, 'relatedvideos', ['id', 'length', 'name', 'trackIds', 'requestId']]
          ]
          response = self._path_request(paths=paths)
          return self._process_response(response=response, component='Search results')
  
-    def fetch_video_list (self, list_id, list_from=0, list_to=20):
+    def fetch_video_list (self, list_id, list_from=0, list_to=26):
          """Fetches the JSON which contains the contents of a given video list
  
          Parameters
@@ -1611,11 +1462,10 @@ class NetflixSession:
          payload = {
              'movieid': id,
              'imageformat': 'jpg',
-            '_': int(time.time())
+            '_': int(time())
          }
-        url = self._get_api_url_for(component='metadata')
-        response = self.session.get(url, params=payload);
-        return self._process_response(response=response, component=url)
+        response = self._session_get(component='metadata', params=payload, type='api')
+        return self._process_response(response=response, component=self._get_api_url_for(component='metadata'))
  
      def fetch_show_information (self, id, type):
          """Fetches the JSON which contains the detailed contents of a show
@@ -1724,12 +1574,11 @@ class NetflixSession:
              Dict containing an email, country & a password property
          """
          # load the profiles page (to verify the user)
-        response = self.session.get(self._get_document_url_for(component='profiles'))
-
+        response = self._session_get(component='profiles')
          # parse out the needed inline information
-        page_soup = BeautifulSoup(response.text)
-        page_data = self.extract_inline_netflix_page_data(page_soup=page_soup)
-        self._parse_page_contents(page_soup)
+        only_script_tags = SoupStrainer('script')
+        page_soup = BeautifulSoup(response.text, 'html.parser', parse_only=only_script_tags)
+        page_data = self._parse_page_contents(page_soup=page_soup)
          account_hash = self._generate_account_hash(account=account)
          self._save_data(filename=self.data_path + '_' + account_hash)
  
@@ -1757,12 +1606,10 @@ class NetflixSession:
          })
  
          params = {
-            'withSize': True,
-            'materialize': True,
              'model': self.user_data['gpsModel']
          }
  
-        return self.session.post(self._get_api_url_for(component='shakti'), params=params, headers=headers, data=data)
+        return self._session_post(component='shakti', type='api', params=params, headers=headers, data=data)
  
      def _is_size_key (self, key):
          """Tiny helper that checks if a given key is called $size or size, as we need to check this often
@@ -1792,7 +1639,10 @@ class NetflixSession:
          :obj:`str`
              API Url
          """
-        return self.api_data['API_ROOT'] + self.api_data['API_BASE_URL'] + '/' + self.api_data['BUILD_IDENTIFIER'] + self.urls[component]
+        if self.api_data['API_ROOT'].find(self.api_data['API_BASE_URL']) > -1:
+            return self.api_data['API_ROOT'] + '/' + self.api_data['BUILD_IDENTIFIER'] + self.urls[component]
+        else:
+            return self.api_data['API_ROOT'] + self.api_data['API_BASE_URL'] + '/' + self.api_data['BUILD_IDENTIFIER'] + self.urls[component]
  
      def _get_document_url_for (self, component):
          """Tiny helper that builds the url for a requested document endpoint component
@@ -1842,6 +1692,34 @@ class NetflixSession:
          # return the parsed response & everything´s fine
          return response.json()
  
+    def _to_unicode(self, str):
+        '''Attempt to fix non uft-8 string into utf-8, using a limited set of encodings
+
+        Parameters
+        ----------
+        str : `str`
+            String to decode
+
+        Returns
+        -------
+        `str`
+            Decoded string
+        '''
+        # fuller list of encodings at http://docs.python.org/library/codecs.html#standard-encodings
+        if not str:  return u''
+        u = None
+        # we could add more encodings here, as warranted.
+        encodings = ('ascii', 'utf8', 'latin1')
+        for enc in encodings:
+            if u:  break
+            try:
+                u = unicode(str,enc)
+            except UnicodeDecodeError:
+                pass
+        if not u:
+            u = unicode(str, errors='replace')
+        return u
+
      def _update_my_list (self, video_id, operation):
          """Tiny helper to add & remove items from "my list"
  
@@ -1869,7 +1747,7 @@ class NetflixSession:
              'authURL': self.user_data['authURL']
          })
  
-        response = self.session.post(self._get_api_url_for(component='update_my_list'), headers=headers, data=payload)
+        response = self._session_post(component='update_my_list', type='api', headers=headers, data=payload)
          return response.status_code == 200
  
      def _save_data(self, filename):
@@ -1971,10 +1849,10 @@ class NetflixSession:
              return False
  
          with open(filename) as f:
-            cookies = pickle.load(f)
-            if cookies:
-                jar = requests.cookies.RequestsCookieJar()
-                jar._cookies = cookies
+            _cookies = pickle.load(f)
+            if _cookies:
+                jar = cookies.RequestsCookieJar()
+                jar._cookies = _cookies
                  self.session.cookies = jar
              else:
                  return False
@@ -2007,4 +1885,453 @@ class NetflixSession:
          :obj:`str`
              Account data hash
          """
-        return base64.urlsafe_b64encode(account['email'])
+        return urlsafe_b64encode(account['email'])
+
+    def _session_post (self, component, type='document', data={}, headers={}, params={}):
+        """Executes a get request using requests for the current session & measures the duration of that request
+
+        Parameters
+        ----------
+        component : :obj:`str`
+            Component to query
+
+        type : :obj:`str`
+            Is it a document or API request ('document' is default)
+
+        data : :obj:`dict` of :obj:`str`
+            Payload body as dict
+
+        header : :obj:`dict` of :obj:`str`
+            Additional headers as dict
+
+        params : :obj:`dict` of :obj:`str`
+            Request params
+
+        Returns
+        -------
+            :obj:`str`
+                Contents of the field to match
+        """
+        url = self._get_document_url_for(component=component) if type == 'document' else self._get_api_url_for(component=component)
+        start = time()
+        response = self.session.post(url=url, data=data, params=params, headers=headers, verify=self.verify_ssl)
+        end = time()
+        self.log(msg='[POST] Request for "' + url + '" took ' + str(end - start) + ' seconds')
+        return response
+
+    def _session_get (self, component, type='document', params={}):
+        """Executes a get request using requests for the current session & measures the duration of that request
+
+        Parameters
+        ----------
+        component : :obj:`str`
+            Component to query
+
+        type : :obj:`str`
+            Is it a document or API request ('document' is default)
+
+        params : :obj:`dict` of :obj:`str`
+            Request params
+
+        Returns
+        -------
+            :obj:`str`
+                Contents of the field to match
+        """
+        url = self._get_document_url_for(component=component) if type == 'document' else self._get_api_url_for(component=component)
+        start = time()
+        response = self.session.get(url=url, verify=self.verify_ssl, params=params)
+        end = time()
+        self.log(msg='[GET] Request for "' + url + '" took ' + str(end - start) + ' seconds')
+        return response
+
+    def _sloppy_parse_user_and_api_data (self, key, contents):
+        """Try to find the user & API data from the inline js by using a string parser
+
+        Parameters
+        ----------
+        key : :obj:`str`
+            Key to match in the inline js
+
+        contents : :obj:`str`
+            Inline JS contents
+
+        Returns
+        -------
+            :obj:`str`
+                Contents of the field to match
+        """
+        key_start = contents.find(key + '"')
+        if int(key_start) == -1:
+            return None
+        sub_contents = contents[int(key_start):]
+        l = sub_contents.find('",')
+        return contents[(int(key_start)+len(key)+3):int(key_start)+l].decode('string_escape')
+
+    def _sloppy_parse_profiles (self, contents):
+        """Try to find the profile data from the inline js by using a string parser & parse/convert the result to JSON
+
+        Parameters
+        ----------
+        contents : :obj:`str`
+            Inline JS contents
+
+        Returns
+        -------
+            :obj:`dict` of :obj:`str` or None
+                Profile data
+        """
+        profile_start = contents.find('profiles":')
+        profile_list_start = contents.find('profilesList')
+        if int(profile_start) > -1 and int(profile_list_start) > -1:
+            try:
+                try:
+                    return json.loads('{"a":{"' + contents[profile_start:profile_list_start-2].decode('string_escape') + '}}').get('a').get('profiles')
+                except ValueError, e:
+                   return None
+            except TypeError, e:
+                return None
+        return None
+
+    def _sloppy_parse_avatars (self, contents):
+        """Try to find the avatar data from the inline js by using a string parser & parse/convert the result to JSON
+
+        Parameters
+        ----------
+        contents : :obj:`str`
+            Inline JS contents
+
+        Returns
+        -------
+            :obj:`dict` of :obj:`str` or None
+                Avatar data
+        """
+        avatars_start = contents.find('"nf":')
+        avatars_list_start = contents.find('"profiles"')
+        if int(avatars_start) > -1 and int(avatars_list_start) > -1:
+            try:
+                try:
+                    return json.loads('{' + contents[avatars_start:avatars_list_start-2].decode('string_escape') + '}')
+                except ValueError, e:
+                   return None
+            except TypeError, e:
+                return None
+        return None
+
+    def _verfify_auth_and_profiles_data (self, data):
+        """Checks if the authURL has at least a certain length & doesn't overrule a certain length & if the profiles dict exists
+        Simple validity check for the sloppy data parser
+
+        Parameters
+        ----------
+        data : :obj:`dict` of :obj:`str`
+            Parsed JS contents
+
+        Returns
+        -------
+            bool
+                Data is valid
+        """
+        if type(data.get('profiles')) == dict:
+            if len(str(data.get('authURL', ''))) > 10 and len(str(data.get('authURL', ''))) < 50:
+                return True
+        return False
+
+    def _sloppy_parse_inline_data (self, scripts):
+        """Strips out all the needed user, api & profile data from the inline JS by string parsing
+        Might fail, so if this doesn't succeed, a proper JS parser will chime in
+
+        Note: This has been added for performance reasons only
+
+        Parameters
+        ----------
+        scripts : :obj:`list` of :obj:`BeautifoulSoup`
+            Script tags & contents from the Netflix browse page
+
+        Returns
+        -------
+            :obj:`dict` of :obj:`str`
+                Dict containijg user, api & profile data
+        """
+        inline_data = {};
+        for script in scripts:
+            contents = str(script.contents[0])
+            important_data = ['authURL', 'API_BASE_URL', 'API_ROOT', 'BUILD_IDENTIFIER', 'ICHNAEA_ROOT', 'gpsModel', 'guid', 'esn']
+            res = {}
+            for key in important_data:
+                _res = self._sloppy_parse_user_and_api_data(key, contents)
+                if _res != None:
+                    res.update({key: _res})
+            if res != {}:
+                inline_data.update(res)
+
+            # parse profiles
+            profiles = self._sloppy_parse_profiles(contents)
+            avatars = self._sloppy_parse_avatars(contents)
+            if profiles != None:
+                inline_data.update({'profiles': profiles})
+            if avatars != None:
+                inline_data.update(avatars)
+        return inline_data
+
+    def _accurate_parse_inline_data (self, scripts):
+        """Uses a proper JS parser to fetch all the api, iser & profile data from within the inline JS
+
+        Note: This is slow but accurate
+
+        Parameters
+        ----------
+        scripts : :obj:`list` of :obj:`BeautifoulSoup`
+            Script tags & contents from the Netflix browse page
+
+        Returns
+        -------
+            :obj:`dict` of :obj:`str`
+                Dict containing user, api & profile data
+        """
+        inline_data = []
+        from pyjsparser import PyJsParser
+        parser = PyJsParser()
+        for script in scripts:
+            data = {}
+            # unicode escape that incoming script stuff
+            contents = self._to_unicode(str(script.contents[0]))
+            # parse the JS & load the declarations we´re interested in
+            parsed = parser.parse(contents)
+            if len(parsed['body']) > 1 and parsed['body'][1]['expression']['right'].get('properties', None) != None:
+                declarations = parsed['body'][1]['expression']['right']['properties']
+                for declaration in declarations:
+                    for key in declaration:
+                        # we found the correct path if the declaration is a dict & of type 'ObjectExpression'
+                        if type(declaration[key]) is dict:
+                            if declaration[key]['type'] == 'ObjectExpression':
+                                # add all static data recursivly
+                                for expression in declaration[key]['properties']:
+                                    data[expression['key']['value']] = self._parse_rec(expression['value'])
+                    inline_data.append(data)
+        return inline_data
+
+    def _parse_rec (self, node):
+        """Iterates over a JavaScript AST and return values found
+
+        Parameters
+        ----------
+        value : :obj:`dict`
+            JS AST Expression
+        Returns
+        -------
+        :obj:`dict` of :obj:`dict` or :obj:`str`
+            Parsed contents of the node
+        """
+        if node['type'] == 'ObjectExpression':
+            _ret = {}
+            for prop in node['properties']:
+                _ret.update({prop['key']['value']: self._parse_rec(prop['value'])})
+            return _ret
+        if node['type'] == 'Literal':
+            return node['value']
+
+    def _parse_user_data (self, netflix_page_data):
+        """Parse out the user data from the big chunk of dicts we got from
+           parsing the JSON-ish data from the netflix homepage
+
+        Parameters
+        ----------
+        netflix_page_data : :obj:`list`
+            List of all the JSON-ish data that has been extracted from the Netflix homepage
+            see: extract_inline_netflix_page_data
+
+        Returns
+        -------
+            :obj:`dict` of :obj:`str`
+
+            {
+                "guid": "72ERT45...",
+                "authURL": "145637....",
+                "gpsModel": "harris"
+            }
+        """
+        user_data = {};
+        important_fields = [
+            'authURL',
+            'gpsModel',
+            'guid'
+        ]
+
+        # values are accessible via dict (sloppy parsing successfull)
+        if type(netflix_page_data) == dict:
+            for important_field in important_fields:
+                user_data.update({important_field: netflix_page_data.get(important_field, '')})
+            return user_data
+
+        # values are stored in lists (returned from JS parser)
+        for item in netflix_page_data:
+            if 'memberContext' in dict(item).keys():
+                for important_field in important_fields:
+                    user_data.update({important_field: item['memberContext']['data']['userInfo'][important_field]})
+
+        return user_data
+
+    def _parse_profile_data (self, netflix_page_data):
+        """Parse out the profile data from the big chunk of dicts we got from
+           parsing the JSON-ish data from the netflix homepage
+
+        Parameters
+        ----------
+        netflix_page_data : :obj:`list`
+            List of all the JSON-ish data that has been extracted from the Netflix homepage
+            see: extract_inline_netflix_page_data
+
+        Returns
+        -------
+            :obj:`dict` of :obj:`dict
+
+            {
+                "72ERT45...": {
+                    "profileName": "username",
+                    "avatar": "http://..../avatar.png",
+                    "id": "72ERT45...",
+                    "isAccountOwner": False,
+                    "isActive": True,
+                    "isFirstUse": False
+                }
+            }
+        """
+        profiles = {};
+        important_fields = [
+            'profileName',
+            'isActive',
+            'isAccountOwner',
+            'isKids'
+        ]
+        # values are accessible via dict (sloppy parsing successfull)
+        if type(netflix_page_data) == dict:
+            for profile_id in netflix_page_data.get('profiles'):
+                if self._is_size_key(key=profile_id) == False and type(netflix_page_data['profiles'][profile_id]) == dict and netflix_page_data['profiles'][profile_id].get('avatar', False) != False:
+                    profile = {'id': profile_id}
+                    for important_field in important_fields:
+                        profile.update({important_field: netflix_page_data['profiles'][profile_id]['summary'][important_field]})
+                    avatar_base = netflix_page_data['nf'].get(netflix_page_data['profiles'][profile_id]['summary']['avatarName'], False);
+                    avatar = 'https://secure.netflix.com/ffe/profiles/avatars_v2/320x320/PICON_029.png' if avatar_base == False else avatar_base['images']['byWidth']['320']['value']
+                    profile.update({'avatar': avatar, 'isFirstUse': False})
+                    profiles.update({profile_id: profile})
+            return profiles
+
+        # values are stored in lists (returned from JS parser)
+        # TODO: get rid of this christmas tree of doom
+        for item in netflix_page_data:
+            if 'hasViewedRatingWelcomeModal' in dict(item).keys():
+                for profile_id in item:
+                    if self._is_size_key(key=profile_id) == False and type(item[profile_id]) == dict and item[profile_id].get('avatar', False) != False:
+                        profile = {'id': profile_id}
+                        for important_field in important_fields:
+                            profile.update({important_field: item[profile_id]['summary'][important_field]})
+                        avatar_base = item['nf'].get(item[profile_id]['summary']['avatarName'], False);
+                        avatar = 'https://secure.netflix.com/ffe/profiles/avatars_v2/320x320/PICON_029.png' if avatar_base == False else avatar_base['images']['byWidth']['320']['value']
+                        profile.update({'avatar': avatar})
+                        profiles.update({profile_id: profile})
+        return profiles
+
+    def _parse_api_base_data (self, netflix_page_data):
+        """Parse out the api url data from the big chunk of dicts we got from
+           parsing the JSOn-ish data from the netflix homepage
+
+        Parameters
+        ----------
+        netflix_page_data : :obj:`list`
+            List of all the JSON-ish data that has been extracted from the Netflix homepage
+            see: extract_inline_netflix_page_data
+
+        Returns
+        -------
+            :obj:`dict` of :obj:`str
+
+            {
+                "API_BASE_URL": "/shakti",
+                "API_ROOT": "https://www.netflix.com/api",
+                "BUILD_IDENTIFIER": "113b89c9",
+                "ICHNAEA_ROOT": "/ichnaea"
+            }
+        """
+        api_data = {};
+        important_fields = [
+            'API_BASE_URL',
+            'API_ROOT',
+            'BUILD_IDENTIFIER',
+            'ICHNAEA_ROOT'
+        ]
+
+        # values are accessible via dict (sloppy parsing successfull)
+        if type(netflix_page_data) == dict:
+            for important_field in important_fields:
+                api_data.update({important_field: netflix_page_data.get(important_field, '')})
+            return api_data
+
+        for item in netflix_page_data:
+            if 'serverDefs' in dict(item).keys():
+                for important_field in important_fields:
+                    api_data.update({important_field: item['serverDefs']['data'][important_field]})
+        return api_data
+
+    def _parse_esn_data (self, netflix_page_data):
+        """Parse out the esn id data from the big chunk of dicts we got from
+           parsing the JSOn-ish data from the netflix homepage
+
+        Parameters
+        ----------
+        netflix_page_data : :obj:`list`
+            List of all the JSON-ish data that has been extracted from the Netflix homepage
+            see: extract_inline_netflix_page_data
+
+        Returns
+        -------
+            :obj:`str` of :obj:`str
+            ESN, something like: NFCDCH-MC-D7D6F54LOPY8J416T72MQXX3RD20ME
+        """
+        # we generate an esn from device strings for android
+        import subprocess
+        try:
+            manufacturer = subprocess.check_output(["/system/bin/getprop", "ro.product.manufacturer"])
+            if manufacturer:
+                esn = 'NFANDROID1-PRV-'
+                input = subprocess.check_output(["/system/bin/getprop", "ro.nrdp.modelgroup"])
+                if not input:
+                    esn = esn + 'T-L3-'
+                else:
+                    esn = esn + input.strip(' \t\n\r') + '-'
+                esn = esn + '{:5}'.format(manufacturer.strip(' \t\n\r').upper())
+                input = subprocess.check_output(["/system/bin/getprop" ,"ro.product.model"])
+                esn = esn + input.strip(' \t\n\r').replace(' ', '=').upper()
+                self.log(msg='Android generated ESN:' + esn)
+                return esn
+        except OSError as e:
+            self.log(msg='Ignoring exception for non Android devices')
+
+        # values are accessible via dict (sloppy parsing successfull)
+        if type(netflix_page_data) == dict:
+            return netflix_page_data.get('esn', '')
+
+        esn = ''
+
+        # values are stored in lists (returned from JS parser)
+        for item in netflix_page_data:
+            if 'esnGeneratorModel' in dict(item).keys():
+                esn = item['esnGeneratorModel']['data']['esn']
+        return esn
+
+    def _parse_page_contents (self, page_soup):
+        """Call all the parsers we need to extract all the session relevant data from the HTML page
+           Directly assigns it to the NetflixSession instance
+
+        Parameters
+        ----------
+        page_soup : :obj:`BeautifulSoup`
+            Instance of an BeautifulSoup document or node containing the complete page contents
+        """
+        netflix_page_data = self.extract_inline_netflix_page_data(page_soup=page_soup)
+        self.user_data = self._parse_user_data(netflix_page_data=netflix_page_data)
+        self.esn = self._parse_esn_data(netflix_page_data=netflix_page_data)
+        self.api_data = self._parse_api_base_data(netflix_page_data=netflix_page_data)
+        self.profiles = self._parse_profile_data(netflix_page_data=netflix_page_data)
+        self.log(msg='Found ESN "' + self.esn + '"')
+        return netflix_page_data