This repository has been archived on 2023-02-21. You can view files and clone it, but cannot push or open issues or pull requests.
library-web/drivers/gftp.py

718 lines
28 KiB
Python

# Copyright (c) 2006 Goran Rakic <grakic@devbase.net>.
#
# This file is part of libgo.
#
# libgo is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# libgo is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with libgo; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
import sys, ftplib, os, md5, time, shutil, types, glob
from os.path import isfile, isdir, dirname, abspath, join
from datetime import datetime
from stat import ST_MTIME
import time
import utils
class gftpDriver:
"""
GNOME FTP driver - fetch files from stable releases from
tarballs on mirror of ftp.gnome.org
"""
def __init__(self, cachedir, stamp, verbose=False,
server='ftp.gnome.org', path='/pub/GNOME/'):
"""
gftpDriver(cachedir, stamp, [verbose, server, path]) where:
- cachedir is location of cache where to write files.
gdu and gtkdoc locators are using outputdir/.cache/gnome as
cachedir path. gftp will use cachedir/../gftp for archives cache
- stamp is a name sufix to use as stamp file. As more than one
locator can share same cachedir, they must use different
stamp files to know when files are updated from an archive
and if archive need to be extracted again. Stamp files are
written to cachedir/module/release/stamp-f[stamp].
- server is FTP server (default ftp.gnome.org)
- path is path to mirror of /pub/GNOME/ directory on server
- if verbose is True, gftp will print a line from time to time
Note: If you are using some other FTP server than default, make
sure that __dirparser method is working correctly!
"""
self.cachedir = cachedir
self.gftpcachedir = abspath(join(cachedir, '..', 'gftp'))
self.stamp = 'stamp-f' + stamp
self.verbose = verbose
if not isdir(self.gftpcachedir):
os.makedirs(self.gftpcachedir)
if not isdir(join(self.gftpcachedir, 'tmp')):
os.makedirs(join(self.gftpcachedir, 'tmp'))
self.server = server
self.path = path
self.sock = None
self.items = {} # {(module,release): datetime of last mod}
self.updated = set() # list of items (module, release) for which
# __updatearchive was called in this run
self.extracted = set() # list of (module,release) that should
# reflect what is in .cache/gftp/tmp
def __del__(self):
"Disconnects from FTP and clears gftp tmp cache"
self.__disconnect()
for module,release in self.extracted:
self.__clearprogress()
print >> sys.stderr, " Warning: Cleaning %s (%s release) from tmp cache" % (module,release)
self.clearCache(module, release)
def __connect(self):
"Open a connection to FTP server"
try:
self.sock = ftplib.FTP(self.server)
self.sock.login()
except ftplib.all_errors, e:
self.__clearprogress()
raise IOError, "Unable to open connection (%s)" % e
def __disconnect(self):
"""
Close FTP link, link is reastablished if it is needed.
It is called from desctructor if link is still active.
"""
if self.sock:
try:
self.sock.quit()
self.sock = None
except ftplib.all_errors:
pass
# Print fancy progress while browsing FTP
__progressactive = False
__progressprinted = False
__progressmsg = False
def __clearprogress(self):
if gftpDriver.__progressprinted:
gftpDriver.__progressprinted = False
print
def __startprogress(self, msg = None):
if msg:
gftpDriver.__progressmsg = True
print msg,
gftpDriver.__progressactive = True
def __stopprogress(self):
if gftpDriver.__progressmsg:
print "done"
else:
print
gftpDriver.__progressactive = False
def __printprogress(self):
if self.verbose and gftpDriver.__progressactive:
print '.',
gftpDriver.__progressprinted = True
__lstcounter = 0
def __nlistdir(self, path):
if self.verbose:
if not gftpDriver.__lstcounter % 15:
self.__printprogress()
gftpDriver.__lstcounter+=1
l = []
self.sock.cwd(path)
l = self.sock.nlst()
self.sock.cwd('/')
return l
def __listdir(self, path):
if self.verbose:
if not gftpDriver.__lstcounter % 15:
self.__printprogress()
gftpDriver.__lstcounter+=1
lines = []
self.sock.dir(path, lines.append)
return [self.__dirparser(line) for line in lines]
__months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
def __dirparser(self, line):
"""
Parse line returned from LIST call
If you are using some other FTP server, make sure
that __dirparser is working correctly!
"""
words = line.split(None, 8)
if len(words) < 6:
raise ValueError, "Directory listing line is too short (%s)"%line
filename = words[-1].lstrip()
i = filename.find(" -> ")
if i >= 0:
# words[0] had better start with 'l'...
filename = filename[:i]
isdir = words[0][0] == 'd'
words[-4] = gftpDriver.__months.index(words[-4])+1
return filename, isdir, words[-4:-1]
__default_starting_date = datetime(1970, 1, 1)
def __liststable(self, path, starting = None):
"""
Return list of stable major.minor releases. If starting is a release
string, or datetime object only newer, if any, releases are returned.
Latest stable release (if any) is appended to end of a list. List is
not ordered.
"""
releases = []
smaj = 0
smin = 0
date = gftpDriver.__default_starting_date
if type(starting) == types.StringType:
sg, sv = starting.split('.')
if sg.isdigit() and sv.isdigit():
smaj = int(sg)
smin = int(sv)
elif starting:
date = starting
lmaj = smaj
lmin = smin
for (f,d,t) in self.__listdir(path):
if d and '.' in f:
sg, sv = f.split('.')
if sg.isdigit() and sv.isdigit():
maj = int(sg)
min = int(sv)
if min%2==0 and maj>0:
if maj>smaj or (maj==smaj and min>=smin):
if ':' in t[-1]:
h, m = t[2].split(':')
y = time.gmtime()[0]
else:
h, m = 0,0
y = t[2]
# This is in UTC
ms = datetime(int(y),int(t[0]),
int(t[1]),int(h),int(m))
if ms >= date:
lmaj = maj
lmin = min
releases.append(f)
if releases:
# there must be at least one - smaj.smin and it is newest
releases.append(str(lmaj)+'.'+str(lmin))
return releases
def __isgreatermicro(self, old, new):
"Returns True if one micro release is greater then other"
inew = []
for n in new:
if not n.isdigit():
return False
inew.append(int(n))
if not old:
return True
iold = [int(o) for o in old]
pad = [0]*abs(len(old)-len(new))
if pad:
if len(old)>len(new):
inew += pad
else:
iold += pad
for i in range(len(inew)):
if inew[i] > iold[i]:
return True
return False
def __latestmicro(self, module, release):
"""
Get latest micro release and create self.items[(module,release)]
datetime object as last mod time of archive in listing
"""
latest, latest_time = None, None
path = self.path+'sources/'+module+'/'+release
try:
lines = self.__listdir(path)
except ftplib.all_errors+(ValueError,), e:
raise RuntimeError, "Unable to list archives (%s)" % e
for (f,d,t) in lines:
if f[-7:] == '.tar.gz':
version = f[f.rfind('-'):-7] # from last '-' to '.tar.gz'
m = version.split('.')[2:] # without major.minor
if self.__isgreatermicro(latest, m):
latest, latest_time = m, t
# There are no archives in directory
if latest == None:
raise RuntimeError, "There are no archives for this release"
else:
try:
t = latest_time
if ':' in t[-1]:
h, m = t[2].split(':')
y = time.gmtime()[0]
else:
h, m = 0,0
y = t[2]
# This is in UTC
ms = datetime(int(y),int(t[0]),int(t[1]),int(h),int(m))
except ValueError, e:
raise RuntimeError, "Failed creating last mod datetime: %s" % e
self.items[(module,release)] = ms
latest = ".".join(latest)
return latest
def __md5(self, filename):
"Calculate md5 sum of a file"
md5h = md5.new()
a = open(filename)
try:
while True:
buf = a.read(4096)
if not buf:
break
md5h.update(buf)
return md5h.hexdigest()
finally:
a.close()
def __getmd5(self, module, release, micro):
"""
Get MD5 hash for archive
Returns None in case of error.
"""
h = None
sarchive = module+'-'+release
if micro: sarchive += '.'+micro
f1 = self.path+'sources/'+module+'/'+release+'/'+sarchive
sarchive += '.tar.gz'
# 1: normal 2: scrollkeeper,gtk-engines 3: gtkhtml
for f in (f1+'.md5sum', f1+'.tar.gz.md5',f1+'.tar.md5sum'):
md5h = []
try:
self.sock.retrlines('RETR '+f, md5h.append)
except ftplib.error_perm:
pass # move to next
else:
for l in md5h:
try:
ha, fi = l.split()
except ValueError, e:
raise RuntimeError, "Invalid line in %s" % basename(f)
if sarchive == fi:
h = ha
if not h:
raise RuntimeError, "Archive missing from %s" % basename(f)
else:
return h
# still no luck?
if not h:
raise RuntimeError, "MD5 file missing"
def __updatearchive(self, module, release):
"Update archive in gftp cache"
try:
micro = self.__latestmicro(module, release)
except RuntimeError, e:
raise RuntimeError, "Unable to find latest micro: %s" % e
archive = module+'-'+release # module: gedit
sarchive = archive # release: 2.14
if micro: sarchive +='.'+micro # micro: 4
# extracted in cache, tmp archive, archive in cache
tmp = join(self.gftpcachedir, 'tmp', archive + '.tar.gz') # tmp/gedit-2.14.tar.gz
arc = join(self.gftpcachedir, archive + '.tar.gz') # gedit-2.14.tar.gz
refresh = True
ms = self.items[(module,release)]
if isfile(tmp):
print >> sys.stderr, " Warning: Archive %s.tar.gz is still in archives cache.\n There must be a crash during last update, refreshing" % sarchive
elif isfile(arc):
# Do we have up to date archive in gftp cache?
# syshackers told me not to depend on micro numbers
# and this make things easier (I don't need to remove old micro)
md = datetime.utcfromtimestamp(os.stat(arc)[ST_MTIME])
if md >= ms:
refresh = False
if refresh:
f = self.path+'sources/'+module+'/'+release+'/'+sarchive+'.tar.gz'
if self.verbose:
print "> RETR %s" % f
try:
a = open(tmp, 'wb')
self.sock.retrbinary('RETR '+f, a.write)
except:
a.close()
if isfile(tmp): os.unlink(tmp)
raise
else:
a.close()
try:
md5d = self.__md5(tmp)
md5s = self.__getmd5(module, release, micro)
except ftplib.all_errors+(RuntimeError,), e:
raise IOError, "Unable to get MD5 checksums: %s" % e
if md5d != md5s:
if isfile(tmp): os.unlink(tmp)
raise RuntimeError, "MD5 missmatch"
else:
os.rename(tmp, arc)
self.updated.add( (module,release) )
def __extractarchive(self, module, release):
"Tries to extract archive to tmp cache"
archive = module+'-'+release
dst = join(self.gftpcachedir, 'tmp', archive) # tmp/gedit-2.14
arc = join(self.gftpcachedir, archive + '.tar.gz') # gedit-2.14.tar.gz
os.makedirs(dst)
cmd = ['tar', '--strip-components=1', '-C', dst, '-xzf', arc]
error = utils.cmd(cmd, self.verbose)
if error:
# clean up so archive can be refetched
if isfile(arc): os.unlink(arc)
if isdir(dst): shutil.rmtree(dst, True)
raise RuntimeError, "tar command crashed, error output:\n%s"%error
# Write down that archive is extracted
# usefull when more than one locator is using same cache as
# we can check this file if we need to reextract archive again
stampdir = join(self.cachedir, module, release)
if not isdir(stampdir):
os.makedirs(stampdir)
utils.touch(join(stampdir, self.stamp))
# Add it so we can clean latter
self.extracted.add((module,release))
return 0
def clearCache(self, module, release):
"""
clearCache(module, release)
Clear extracted archive from gftp. If not before, cache is
cleared when gftp's destructor is called.
"""
if (module,release) in self.extracted:
d = join(self.gftpcachedir, 'tmp', module + '-' + release)
if isdir(d):
shutil.rmtree(d, True)
self.extracted.remove((module,release))
def getGnomeItems(self, sections = None, starting = None):
"""
getGnomeItems(sections = None, starting = None)
Returns a tuple of two dictionaries, both with GNOME release as a key
and list of items as values. ({gnome_release:[(module,release),...]}
First one is for past GNOME releases, and second is for latest
GNOME release where latest release is latest stable major.minor number
from /pub/GNOME/[sections]/ with all four, or just sections of interest
as sections.
If starting is a major.minor release (like '2.8') only latter or equal
releases are returned (what can be empty list). Starting can also be
a datetime object.
sections can be a list or tuple of GNOME sections (like 'admin' or
'desktop') of interest. There is a hack implemented for supporting
listing items from 'bindings' section.
"""
self.__startprogress(" Listing GNOME modules")
items = {}
lmaj = 0
lmin = 0
try:
self.__connect()
for section in sections:
# Don't accept 'sources' or 'teams'
# and about 'bindings', see FIXME comment below
if not section in ('admin', 'desktop', 'platform'):
print >> sys.stderr, " Warning: Browsing of %s is not supported." % section
continue
releases = self.__liststable(self.path + section, starting)
for release in releases[:-1]:
if not release in items:
items[release] = set()
if release == releases[-1]:
g, v = release.split('.')
maj = int(g)
min = int(v)
if maj>lmaj or (maj==lmaj and min>lmin):
lmaj = maj
lmin = min
# Find minor version of this section
path = self.path+section+'/'+release+'/'
latest = 0
for f in self.__nlistdir(path):
m = f[f.rfind('.')+1:]
if m.isdigit() and int(m) > latest:
latest = int(m)
path += release+'.'+str(latest)+'/sources/'
# FIXME: Currently there are no locator for bindings API
# references, and there are some problems with Perl
# bindings. They are way to many "hacks" so listing of
# bindings must be defined as another method
# Hack to support listing of bindings
# subsections = ['']
# if section == 'bindings':
# subsections = self.__nlistdir(path)
# subsections.remove('MD5SUMS-for-bz2')
# subsections.remove('MD5SUMS-for-gz')
#
# # Perl modules are not in /sources and they are using
# # "strange" version numbers. I can addsomething
# # like getPerlBindingsItems or make public general
# # methods like __listdir when locator for perldoc is
# # written...
# subsections.remove('perl')
#
# for subs in subsections:
# spath = path
# if subs: spath += subs
# # now use spath instead path in for loop
for f in self.__nlistdir(path):
if f[-7:] == '.tar.gz':
# gedit-2.14.3.tar.gz -> gedit-2.14
i = f.rfind('-')
module = f[:i]
ver = '.'.join(f[i+1:-7].split('.')[:2])
items[release].add((module,ver))
except ftplib.all_errors+(ValueError,), e:
self.__disconnect()
self.__stopprogress()
raise RuntimeError, "Unable to list GNOME modules: %s" % e
else:
self.__disconnect()
self.__stopprogress()
if lmaj > 0:
latest_release = str(lmaj)+'.'+str(lmin)
latest_items = {latest_release:items[latest_release]}
del items[latest_release]
else:
raise RuntimeError, "Unable to locate latest stable GNOME release"
return items, latest_items
def getItems(self, starting = None):
"""
getItems(starting = None)
Returns a tuple of two lists. One is list of stable items
(module,release) without latest stable releases and other is list of
latest stable releases of items.
If starting is datetime object, only releases after that time, if any,
are returned. Latest stable release (if any) is appended to the end.
Starting also can be major.minor release string.
"""
self.__startprogress(" Listing modules")
items = set()
latest_items = set()
try:
self.__connect()
for f in self.__nlistdir(self.path+'sources'):
releases = self.__liststable(self.path+'sources/'+f, starting)
for release in releases[:-1]:
if release == releases[-1]:
latest_items.add((f,release))
else:
items.add((f,release))
except ftplib.all_errors+(ValueError,), e:
self.__disconnect()
self.__stopprogress()
raise RuntimeError, "Unable to list modules: %s" % e
else:
self.__stopprogress()
self.__disconnect()
return items, latest_items
def getLastUpdate(self, module, files, release):
"""
getLastUpdate(self, module, files, release)
Return lastest update Unix timestamp of files in list.
If list is empty or all files are missing, 0 is returned.
"""
if type(files) == types.StringType:
files = [files]
last = 0
# /gedit-2.14/Makefile.am -> /gedit/2.14/Makefile.am
src = join(self.cachedir, module, release)
# we used copy2 to keep mod times from tarballs
for f in files:
if isfile(join(src, f)):
# FIXME: Is this UTC???
# Or it is in whatever timezone developer is using
ms = os.stat(join(src, f))[ST_MTIME]
if ms > last:
last = ms
return last
def updateFiles(self, module, files, release):
"""
updateFiles(module, files, release)
request list of files to be updated and stored in cache, returns a list
of updated files.
First, updateFiles check if item (module, release) is already extracted
in gftp tmp cache (item is included in self.extracted list) and if it
is not it will try to update it first checking if item not already
updated (included in self.updated) and if it is not, it will call
__updatearchive method. This method will check if archive on FTP
server is newer than archive in gftp cache (if it exists) and if it
is, method will fetch the archive, validate it against its MD5 sum
and move it to gftp cache. If MD5 check fails or archive can not be
fetched, it will try once more and then give up.
Next, updateFiles check if archive in gftp cache is newer then a stamp
of last update of sources in cache and if it is it will call
__extractarchive method to extract archive to gftp tmp cache, include
it in self.extracted list and update stamp file.
If archive is extracted, updateFiles will check for every file does it
need to be updated. If it does, file will be copied with preserving
last mod time to cache and included in list of updated files.
Extracted archive will be preserved in gftp cache until clearCache or
gftp's destructor is called.
"""
if type(files) == types.StringType:
files = [files]
archive = module + '-' + release
arc = join(self.gftpcachedir, archive + '.tar.gz')
src = join(self.gftpcachedir, 'tmp', archive)
dst = join(self.cachedir, module, release)
# If archive is not extracted...
if not (module, release) in self.extracted:
# If archive is still in tmp cache, than last update from it
# must failed. We don't know what locator failed, so we remove
# all stamps. Now, whichever it was, it will extract archive
# again next time. This will also include this request...
if isdir(src):
print >> sys.stderr, " Warning: Archive %s.tar.gz is still extracted in tmp cache (%s).\n There must be a crash during last extraction, cleaning" % (archive, src)
shutil.rmtree(src, True)
for stamp in glob.glob(join(self.cachedir, '*', '*', 'stamp-f*')):
os.unlink(stamp)
# We want to call __update archive only once for each item
if not (module, release) in self.updated:
try:
self.__connect()
self.__updatearchive(module,release)
except ftplib.all_errors+(RuntimeError,), e:
self.__disconnect()
raise RuntimeError, "Unable to update %s.tar.gz:\n%s" % (archive,e)
else:
self.__disconnect()
# If archive has newer timestamp than our stamp file, extract it
extract = True
# This is in local time
arcs = os.stat(arc)[ST_MTIME]
if isfile(join(dst, self.stamp)):
arcd = os.stat(join(dst, self.stamp))[ST_MTIME]
if arcd >= arcs:
extract = False
if extract:
try:
self.__extractarchive(module,release)
except (IOError,RuntimeError), e:
raise RuntimeError, "Unable to extract %s.tar.gz:\n%s" % (archive,e)
# If it is extracted, update files
if (module, release) in self.extracted:
# Dummy check, src/* still can be damaged, but who cares :)
if not isdir(src):
self.extracted.remove( (module,release) )
raise RuntimeError, "Module %s (%s release) is included in list of extracted archives, but directory is missing from tmp cache." % (module,release)
for g in files:
founded = glob.glob(join(src, g))
existing = glob.glob(join(dst, g))
for f in founded:
f = f[len(src):].lstrip(os.sep)
update = True
# See FIXME not on getLastUpdate
# Is this UTC or it is in dev's timezone?
ms = os.stat(join(src, f))[ST_MTIME]
if isfile(join(dst, f)):
md = os.stat(join(dst, f))[ST_MTIME]
if md > ms:
print >> sys.stderr, " Warning: Newer timestamp in source than in destination, update forced\n Module %s, file: %s" % (module,f)
elif md == ms:
update = False
if update:
try:
if not isdir(join(dst, dirname(f))_:
os.makedirs(join(dst, dirname(f)))
shutil.copy2(join(src, f), join(dst, f))
except IOError, e:
if isfile(join(dst, f)):
os.unlink(join(dst, f))
raise
# Remove nonexisting file, won't prune directories...
for f in existing:
f = f[len(dst):].lstrip(os.sep)
if join(src, f) not in founded:
os.unlink(join(dst, f))