1 ###############################################################################
2 # Universal Analytics for Python
3 # Copyright (c) 2013, Analytics Pros
5 # This project is free software, distributed under the BSD license.
6 # Analytics Pros offers consulting and integration services if your firm needs
7 # assistance in strategy, implementation, or auditing existing work.
8 ###############################################################################
10 from urllib2 import urlopen, build_opener, install_opener
11 from urllib2 import Request, HTTPSHandler
12 from urllib2 import URLError, HTTPError
13 from urllib import urlencode
24 def generate_uuid(basedata = None):
25 """ Provides a _random_ UUID with no input, or a UUID4-format MD5 checksum of any input data provided """
27 return str(uuid.uuid4())
28 elif isinstance(basedata, basestring):
29 checksum = hashlib.md5(basedata).hexdigest()
30 return '%8s-%4s-%4s-%4s-%12s' % (checksum[0:8], checksum[8:12], checksum[12:16], checksum[16:20], checksum[20:32])
33 class Time(datetime.datetime):
34 """ Wrappers and convenience methods for processing various time representations """
37 def from_unix(cls, seconds, milliseconds = 0):
38 """ Produce a full |datetime.datetime| object from a Unix timestamp """
39 base = list(time.gmtime(seconds))[0:6]
40 base.append(milliseconds * 1000) # microseconds
44 def to_unix(cls, timestamp):
45 """ Wrapper over time module to produce Unix epoch time as a float """
46 if not isinstance(timestamp, datetime.datetime):
47 raise TypeError, 'Time.milliseconds expects a datetime object'
48 base = time.mktime(timestamp.timetuple())
52 def milliseconds_offset(cls, timestamp, now = None):
53 """ Offset time (in milliseconds) from a |datetime.datetime| object to now """
54 if isinstance(timestamp, (int, float)):
57 base = cls.to_unix(timestamp)
58 base = base + (timestamp.microsecond / 1000000)
61 return (now - base) * 1000
65 class HTTPRequest(object):
66 """ URL Construction and request handling abstraction.
67 This is not intended to be used outside this module.
69 Automates mapping of persistent state (i.e. query parameters)
70 onto transcient datasets for each query.
73 endpoint = 'https://www.google-analytics.com/collect'
78 """ Activate debugging on urllib2 """
79 handler = HTTPSHandler(debuglevel = 1)
80 opener = build_opener(handler)
81 install_opener(opener)
83 # Store properties for all requests
84 def __init__(self, user_agent = None, *args, **opts):
85 self.user_agent = user_agent or 'Analytics Pros - Universal Analytics (Python)'
89 def fixUTF8(cls, data): # Ensure proper encoding for UA's servers...
90 """ Convert all strings to UTF-8 """
92 if isinstance(data[ key ], basestring):
93 data[ key ] = data[ key ].encode('utf-8')
98 # Apply stored properties to the given dataset & POST to the configured endpoint
101 self.endpoint + '?' + urlencode(self.fixUTF8(data)),
103 'User-Agent': self.user_agent
108 def open(self, request):
110 return urlopen(request)
111 except HTTPError as e:
113 except URLError as e:
114 self.cache_request(request)
117 def cache_request(self, request):
118 # TODO: implement a proper caching mechanism here for re-transmitting hits
119 # record = (Time.now(), request.get_full_url(), request.get_data(), request.headers)
125 class HTTPPost(HTTPRequest):
127 # Apply stored properties to the given dataset & POST to the configured endpoint
128 def send(self, data):
131 data = urlencode(self.fixUTF8(data)),
133 'User-Agent': self.user_agent
143 class Tracker(object):
144 """ Primary tracking interface for Universal Analytics """
147 valid_hittypes = ('pageview', 'event', 'social', 'screenview', 'transaction', 'item', 'exception', 'timing')
151 def alias(cls, typemap, base, *names):
152 """ Declare an alternate (humane) name for a measurement protocol parameter """
153 cls.parameter_alias[ base ] = (typemap, base)
155 cls.parameter_alias[ i ] = (typemap, base)
158 def coerceParameter(cls, name, value = None):
159 if isinstance(name, basestring) and name[0] == '&':
160 return name[1:], str(value)
161 elif name in cls.parameter_alias:
162 typecast, param_name = cls.parameter_alias.get(name)
163 return param_name, typecast(value)
165 raise KeyError, 'Parameter "{0}" is not recognized'.format(name)
168 def payload(self, data):
169 for key, value in data.iteritems():
171 yield self.coerceParameter(key, value)
178 'pageview': [ (basestring, 'dp') ],
179 'event': [ (basestring, 'ec'), (basestring, 'ea'), (basestring, 'el'), (int, 'ev') ],
180 'social': [ (basestring, 'sn'), (basestring, 'sa'), (basestring, 'st') ],
181 'timing': [ (basestring, 'utc'), (basestring, 'utv'), (basestring, 'utt'), (basestring, 'utl') ]
185 def consume_options(cls, data, hittype, args):
186 """ Interpret sequential arguments related to known hittypes based on declared structures """
188 data[ 't' ] = hittype # integrate hit type parameter
189 if hittype in cls.option_sequence:
190 for expected_type, optname in cls.option_sequence[ hittype ]:
191 if opt_position < len(args) and isinstance(args[opt_position], expected_type):
192 data[ optname ] = args[ opt_position ]
199 def hittime(cls, timestamp = None, age = None, milliseconds = None):
200 """ Returns an integer represeting the milliseconds offset for a given hit (relative to now) """
201 if isinstance(timestamp, (int, float)):
202 return int(Time.milliseconds_offset(Time.from_unix(timestamp, milliseconds = milliseconds)))
203 if isinstance(timestamp, datetime.datetime):
204 return int(Time.milliseconds_offset(timestamp))
205 if isinstance(age, (int, float)):
206 return int(age * 1000) + (milliseconds or 0)
212 return self.params.get('tid', None)
215 def __init__(self, account, name = None, client_id = None, hash_client_id = False, user_id = None, user_agent = None, use_post = True):
217 if use_post is False:
218 self.http = HTTPRequest(user_agent = user_agent)
220 self.http = HTTPPost(user_agent = user_agent)
222 self.params = { 'v': 1, 'tid': account }
224 if client_id is None:
225 client_id = generate_uuid()
227 self.params[ 'cid' ] = client_id
229 self.hash_client_id = hash_client_id
231 if user_id is not None:
232 self.params[ 'uid' ] = user_id
235 def set_timestamp(self, data):
236 """ Interpret time-related options, apply queue-time parameter as needed """
237 if 'hittime' in data: # an absolute timestamp
238 data['qt'] = self.hittime(timestamp = data.pop('hittime', None))
239 if 'hitage' in data: # a relative age (in seconds)
240 data['qt'] = self.hittime(age = data.pop('hitage', None))
243 def send(self, hittype, *args, **data):
244 """ Transmit HTTP requests to Google Analytics using the measurement protocol """
246 if hittype not in self.valid_hittypes:
247 raise KeyError('Unsupported Universal Analytics Hit Type: {0}'.format(repr(hittype)))
249 self.set_timestamp(data)
250 self.consume_options(data, hittype, args)
252 for item in args: # process dictionary-object arguments of transcient data
253 if isinstance(item, dict):
254 for key, val in self.payload(item):
257 for k, v in self.params.iteritems(): # update only absent parameters
262 data = dict(self.payload(data))
264 if self.hash_client_id:
265 data[ 'cid' ] = generate_uuid(data[ 'cid' ])
267 # Transmit the hit to Google...
273 # Setting persistent attibutes of the session/hit/etc (inc. custom dimensions/metrics)
274 def set(self, name, value = None):
275 if isinstance(name, dict):
276 for key, value in name.iteritems():
278 param, value = self.coerceParameter(key, value)
279 self.params[param] = value
282 elif isinstance(name, basestring):
284 param, value = self.coerceParameter(name, value)
285 self.params[param] = value
291 def __getitem__(self, name):
292 param, value = self.coerceParameter(name, None)
293 return self.params.get(param, None)
295 def __setitem__(self, name, value):
296 param, value = self.coerceParameter(name, value)
297 self.params[param] = value
299 def __delitem__(self, name):
300 param, value = self.coerceParameter(name, None)
301 if param in self.params:
302 del self.params[param]
304 def safe_unicode(obj):
305 """ Safe convertion to the Unicode string version of the object """
308 except UnicodeDecodeError:
309 return obj.decode('utf-8')
312 # Declaring name mappings for Measurement Protocol parameters
313 MAX_CUSTOM_DEFINITIONS = 200
314 MAX_EC_LISTS = 11 # 1-based index
315 MAX_EC_PRODUCTS = 11 # 1-based index
316 MAX_EC_PROMOTIONS = 11 # 1-based index
318 Tracker.alias(int, 'v', 'protocol-version')
319 Tracker.alias(safe_unicode, 'cid', 'client-id', 'clientId', 'clientid')
320 Tracker.alias(safe_unicode, 'tid', 'trackingId', 'account')
321 Tracker.alias(safe_unicode, 'uid', 'user-id', 'userId', 'userid')
322 Tracker.alias(safe_unicode, 'uip', 'user-ip', 'userIp', 'ipaddr')
323 Tracker.alias(safe_unicode, 'ua', 'userAgent', 'userAgentOverride', 'user-agent')
324 Tracker.alias(safe_unicode, 'dp', 'page', 'path')
325 Tracker.alias(safe_unicode, 'dt', 'title', 'pagetitle', 'pageTitle' 'page-title')
326 Tracker.alias(safe_unicode, 'dl', 'location')
327 Tracker.alias(safe_unicode, 'dh', 'hostname')
328 Tracker.alias(safe_unicode, 'sc', 'sessioncontrol', 'session-control', 'sessionControl')
329 Tracker.alias(safe_unicode, 'dr', 'referrer', 'referer')
330 Tracker.alias(int, 'qt', 'queueTime', 'queue-time')
331 Tracker.alias(safe_unicode, 't', 'hitType', 'hittype')
332 Tracker.alias(int, 'aip', 'anonymizeIp', 'anonIp', 'anonymize-ip')
335 # Campaign attribution
336 Tracker.alias(safe_unicode, 'cn', 'campaign', 'campaignName', 'campaign-name')
337 Tracker.alias(safe_unicode, 'cs', 'source', 'campaignSource', 'campaign-source')
338 Tracker.alias(safe_unicode, 'cm', 'medium', 'campaignMedium', 'campaign-medium')
339 Tracker.alias(safe_unicode, 'ck', 'keyword', 'campaignKeyword', 'campaign-keyword')
340 Tracker.alias(safe_unicode, 'cc', 'content', 'campaignContent', 'campaign-content')
341 Tracker.alias(safe_unicode, 'ci', 'campaignId', 'campaignID', 'campaign-id')
344 Tracker.alias(safe_unicode, 'sr', 'screenResolution', 'screen-resolution', 'resolution')
345 Tracker.alias(safe_unicode, 'vp', 'viewport', 'viewportSize', 'viewport-size')
346 Tracker.alias(safe_unicode, 'de', 'encoding', 'documentEncoding', 'document-encoding')
347 Tracker.alias(int, 'sd', 'colors', 'screenColors', 'screen-colors')
348 Tracker.alias(safe_unicode, 'ul', 'language', 'user-language', 'userLanguage')
351 Tracker.alias(safe_unicode, 'an', 'appName', 'app-name', 'app')
352 Tracker.alias(safe_unicode, 'cd', 'contentDescription', 'screenName', 'screen-name', 'content-description')
353 Tracker.alias(safe_unicode, 'av', 'appVersion', 'app-version', 'version')
354 Tracker.alias(safe_unicode, 'aid', 'appID', 'appId', 'application-id', 'app-id', 'applicationId')
355 Tracker.alias(safe_unicode, 'aiid', 'appInstallerId', 'app-installer-id')
358 Tracker.alias(safe_unicode, 'ta', 'affiliation', 'transactionAffiliation', 'transaction-affiliation')
359 Tracker.alias(safe_unicode, 'ti', 'transaction', 'transactionId', 'transaction-id')
360 Tracker.alias(float, 'tr', 'revenue', 'transactionRevenue', 'transaction-revenue')
361 Tracker.alias(float, 'ts', 'shipping', 'transactionShipping', 'transaction-shipping')
362 Tracker.alias(float, 'tt', 'tax', 'transactionTax', 'transaction-tax')
363 Tracker.alias(safe_unicode, 'cu', 'currency', 'transactionCurrency', 'transaction-currency') # Currency code, e.g. USD, EUR
364 Tracker.alias(safe_unicode, 'in', 'item-name', 'itemName')
365 Tracker.alias(float, 'ip', 'item-price', 'itemPrice')
366 Tracker.alias(float, 'iq', 'item-quantity', 'itemQuantity')
367 Tracker.alias(safe_unicode, 'ic', 'item-code', 'sku', 'itemCode')
368 Tracker.alias(safe_unicode, 'iv', 'item-variation', 'item-category', 'itemCategory', 'itemVariation')
371 Tracker.alias(safe_unicode, 'ec', 'event-category', 'eventCategory', 'category')
372 Tracker.alias(safe_unicode, 'ea', 'event-action', 'eventAction', 'action')
373 Tracker.alias(safe_unicode, 'el', 'event-label', 'eventLabel', 'label')
374 Tracker.alias(int, 'ev', 'event-value', 'eventValue', 'value')
375 Tracker.alias(int, 'ni', 'noninteractive', 'nonInteractive', 'noninteraction', 'nonInteraction')
379 Tracker.alias(safe_unicode, 'sa', 'social-action', 'socialAction')
380 Tracker.alias(safe_unicode, 'sn', 'social-network', 'socialNetwork')
381 Tracker.alias(safe_unicode, 'st', 'social-target', 'socialTarget')
384 Tracker.alias(safe_unicode, 'exd', 'exception-description', 'exceptionDescription', 'exDescription')
385 Tracker.alias(int, 'exf', 'exception-fatal', 'exceptionFatal', 'exFatal')
388 Tracker.alias(safe_unicode, 'utc', 'timingCategory', 'timing-category')
389 Tracker.alias(safe_unicode, 'utv', 'timingVariable', 'timing-variable')
390 Tracker.alias(int, 'utt', 'time', 'timingTime', 'timing-time')
391 Tracker.alias(safe_unicode, 'utl', 'timingLabel', 'timing-label')
392 Tracker.alias(float, 'dns', 'timingDNS', 'timing-dns')
393 Tracker.alias(float, 'pdt', 'timingPageLoad', 'timing-page-load')
394 Tracker.alias(float, 'rrt', 'timingRedirect', 'timing-redirect')
395 Tracker.alias(safe_unicode, 'tcp', 'timingTCPConnect', 'timing-tcp-connect')
396 Tracker.alias(safe_unicode, 'srt', 'timingServerResponse', 'timing-server-response')
398 # Custom dimensions and metrics
399 for i in range(0,200):
400 Tracker.alias(safe_unicode, 'cd{0}'.format(i), 'dimension{0}'.format(i))
401 Tracker.alias(int, 'cm{0}'.format(i), 'metric{0}'.format(i))
404 Tracker.alias(str, 'pa') # Product action
405 Tracker.alias(str, 'tcc') # Coupon code
406 Tracker.alias(unicode, 'pal') # Product action list
407 Tracker.alias(int, 'cos') # Checkout step
408 Tracker.alias(str, 'col') # Checkout step option
410 Tracker.alias(str, 'promoa') # Promotion action
412 for product_index in range(1, MAX_EC_PRODUCTS):
413 Tracker.alias(str, 'pr{0}id'.format(product_index)) # Product SKU
414 Tracker.alias(unicode, 'pr{0}nm'.format(product_index)) # Product name
415 Tracker.alias(unicode, 'pr{0}br'.format(product_index)) # Product brand
416 Tracker.alias(unicode, 'pr{0}ca'.format(product_index)) # Product category
417 Tracker.alias(unicode, 'pr{0}va'.format(product_index)) # Product variant
418 Tracker.alias(str, 'pr{0}pr'.format(product_index)) # Product price
419 Tracker.alias(int, 'pr{0}qt'.format(product_index)) # Product quantity
420 Tracker.alias(str, 'pr{0}cc'.format(product_index)) # Product coupon code
421 Tracker.alias(int, 'pr{0}ps'.format(product_index)) # Product position
423 for custom_index in range(MAX_CUSTOM_DEFINITIONS):
424 Tracker.alias(str, 'pr{0}cd{1}'.format(product_index, custom_index)) # Product custom dimension
425 Tracker.alias(int, 'pr{0}cm{1}'.format(product_index, custom_index)) # Product custom metric
427 for list_index in range(1, MAX_EC_LISTS):
428 Tracker.alias(str, 'il{0}pi{1}id'.format(list_index, product_index)) # Product impression SKU
429 Tracker.alias(unicode, 'il{0}pi{1}nm'.format(list_index, product_index)) # Product impression name
430 Tracker.alias(unicode, 'il{0}pi{1}br'.format(list_index, product_index)) # Product impression brand
431 Tracker.alias(unicode, 'il{0}pi{1}ca'.format(list_index, product_index)) # Product impression category
432 Tracker.alias(unicode, 'il{0}pi{1}va'.format(list_index, product_index)) # Product impression variant
433 Tracker.alias(int, 'il{0}pi{1}ps'.format(list_index, product_index)) # Product impression position
434 Tracker.alias(int, 'il{0}pi{1}pr'.format(list_index, product_index)) # Product impression price
436 for custom_index in range(MAX_CUSTOM_DEFINITIONS):
437 Tracker.alias(str, 'il{0}pi{1}cd{2}'.format(list_index, product_index, custom_index)) # Product impression custom dimension
438 Tracker.alias(int, 'il{0}pi{1}cm{2}'.format(list_index, product_index, custom_index)) # Product impression custom metric
440 for list_index in range(1, MAX_EC_LISTS):
441 Tracker.alias(unicode, 'il{0}nm'.format(list_index)) # Product impression list name
443 for promotion_index in range(1, MAX_EC_PROMOTIONS):
444 Tracker.alias(str, 'promo{0}id'.format(promotion_index)) # Promotion ID
445 Tracker.alias(unicode, 'promo{0}nm'.format(promotion_index)) # Promotion name
446 Tracker.alias(str, 'promo{0}cr'.format(promotion_index)) # Promotion creative
447 Tracker.alias(str, 'promo{0}ps'.format(promotion_index)) # Promotion position
450 # Shortcut for creating trackers
451 def create(account, *args, **kwargs):
452 return Tracker(account, *args, **kwargs)
454 # vim: set nowrap tabstop=4 shiftwidth=4 softtabstop=0 expandtab textwidth=0 filetype=python foldmethod=indent foldcolumn=4