Use mozilla effective tld data to determine domain name on to set cookie

This commit is contained in:
Frédéric Péters 2010-08-15 15:16:26 +00:00
parent 12ba88bdbd
commit 472daed940
3 changed files with 4554 additions and 8 deletions

4449
data/vendor/effective_tld_names.dat vendored Normal file

File diff suppressed because it is too large Load Diff

View File

@ -268,15 +268,18 @@ class QommonPublisher(Publisher):
if request:
canonical_hostname = request.get_server(clean = False).lower().split(':')[0].rstrip('.')
if canonical_hostname.count('.') >= 2:
# somehow hostname could be an IP address, we avoid setting up
# a bogus domain for cookie (127.0.0.1 -> 0.1) by checking if
# last part is numerical.
last_part = canonical_hostname.split('.')[-1]
if canonical_hostname.count('.') >= 2 and self.etld:
try:
int(last_part)
except ValueError:
self.config.session_cookie_domain = '.'.join(canonical_hostname.split('.')[-2:])
socket.inet_aton(canonical_hostname)
except socket.error:
# not an IP address
try:
base_name = self.etld.parse(canonical_hostname)[1]
except:
pass
else:
self.config.session_cookie_domain = '.'.join(
canonical_hostname.split('.')[-2-base_name.count('.'):])
md5_hash = md5_new()
md5_hash.update(self.app_dir)
@ -445,11 +448,20 @@ class QommonPublisher(Publisher):
# started more than two days ago, probably aborted job
job.remove_self()
def load_effective_tld_names(cls):
filename = os.path.join(cls.DATA_DIR, 'vendor', 'effective_tld_names.dat')
if not os.path.exists(filename):
cls.etld = None
from vendor import etld
cls.etld = etld.etld(filename)
load_effective_tld_names = classmethod(load_effective_tld_names)
def create_publisher(cls):
cls.load_extra_dirs()
cls.load_translations()
cls.register_cronjob(CronJob(cls.clean_sessions, minutes = range(0, 60, 5)))
cls.register_cronjob(CronJob(cls.clean_afterjobs, minutes = [0]))
cls.load_effective_tld_names()
publisher = cls(cls.root_directory_class(),
session_cookie_name = cls.APP_NAME,

85
wcs/qommon/vendor/etld.py vendored Normal file
View File

@ -0,0 +1,85 @@
#!/usr/bin/python
# Copyright (c) 2009 Michael Still
# Released under the terms of the GNU GPL v2
# Mozilla publishes a rule file which may be used to calculate effective TLDs
# at:
#
# http://mxr.mozilla.org/mozilla-central/source/netwerk/dns/src/
# effective_tld_names.dat?raw=1
#
# Use that file to take a domain name and return a (domain, etld) tuple.
# Documentation for the rule file format is at:
#
# https://wiki.mozilla.org/Gecko:Effective_TLD_Service
import re
import sys
import time
class etld(object):
"""Helper to determine the effective TLD portion of a domain name."""
def __init__(self, datafile='effective_tld_names.dat'):
"""Load the data file ready for lookups."""
self.rules = {}
file = open(datafile)
line = file.readline()
while line:
line = line.rstrip()
if line and not line.startswith('//'):
tld = line.split('.')[-1]
self.rules.setdefault(tld, [])
self.rules[tld].append(re.compile(self.regexpize(line)))
line = file.readline()
file.close()
def regexpize(self, line):
"""Convert a rule to regexp syntax."""
line = line[::-1].replace('.', '\\.').replace('*', '[^\\.]*').replace('!', '')
return '^(%s)\.(.*)$' % line
def parse(self, hostname):
"""Parse a hostanme into domain and etld portions."""
hostname = hostname.lower()
tld = hostname.split('.')[-1]
hostname = hostname[::-1]
domain = ''
etld = ''
for rule in self.rules[tld]:
m = rule.match(hostname)
if m and m.group(1) > etld:
domain = m.group(2)[::-1]
etld = m.group(1)[::-1]
if not etld:
raise Exception('Parse failed')
return (domain, etld)
if __name__ == '__main__':
e = etld()
f = open(sys.argv[1])
l = f.readline()
start_time = time.time()
while l:
try:
l = l.rstrip()
print '%s -> %s' %(l, e.parse(l))
except Exception, ex:
print ex
l = f.readline()
print 'Took %f seconds' % (time.time() - start_time)
f.close()