Rewrite ptl_compile to work with Python 3.

Support new compiled module layout (__pycache__).  Fix
compile_package() so it can be used for backwards compatibility
with Quixote applications using it.
This commit is contained in:
Neil Schemenauer 2016-05-17 20:25:09 +00:00
parent 0911f3761b
commit 334301fded
3 changed files with 460 additions and 251 deletions

View File

@ -1,286 +1,337 @@
#!/www/python/bin/python
#!/usr/bin/env python
"""Compile a PTL template.
First template function names are mangled, noting the template type.
Next, the file is parsed into a parse tree. This tree is converted into
a modified AST. It is during this state that the semantics are modified
by adding extra nodes to the tree. Finally bytecode is generated using
the compiler.
"""
import ast
import sys
import os
import stat
import re
import imp
import marshal
import struct
import importlib.util
import py_compile
from .ptl_import import PTL_EXT, PTLFileLoader
from .ptl_parse import parse
HTML_TEMPLATE_PREFIX = "_q_html_template_"
PLAIN_TEMPLATE_PREFIX = "_q_plain_template_"
class TemplateTransformer(ast.NodeTransformer):
def __init__(self, *args, **kwargs):
ast.NodeTransformer.__init__(self, *args, **kwargs)
# __template_type is a stack whose values are
# "html", "plain", or None
self.__template_type = [None]
def ptl_compile(file, cfile, dfile=None, doraise=False, optimize=-1):
"""Byte-compile one PTL source file to Python bytecode.
def _get_template_type(self):
"""Return the type of the function being compiled (
"html", "plain", or None)
"""
if self.__template_type:
return self.__template_type[-1]
else:
return None
:param file: The source file name.
:param cfile: The target byte compiled file name.
:param dfile: Purported file name, i.e. the file name that shows up in
error messages. Defaults to the source file name.
:param doraise: Flag indicating whether or not an exception should be
raised when a compile error is found. If an exception occurs and this
flag is set to False, a string indicating the nature of the exception
will be printed, and the function will return to the caller. If an
exception occurs and this flag is set to True, a PyCompileError
exception will be raised.
:param optimize: The optimization level for the compiler. Valid values
are -1, 0, 1 and 2. A value of -1 means to use the optimization
level of the current interpreter, as given by -O command line options.
def visit_Module(self, node):
html_imp = ast.ImportFrom(module='quixote.html',
names=[ast.alias(name='TemplateIO',
asname='_q_TemplateIO'),
ast.alias(name='htmltext',
asname='_q_htmltext')],
level=0)
ast.fix_missing_locations(html_imp)
vars_imp = ast.ImportFrom(module='builtins',
names=[ast.alias(name='vars',
asname='_q_vars')], level=0)
ast.fix_missing_locations(vars_imp)
ptl_imports = [vars_imp, html_imp]
# skip __future__ statements
idx = 0
for i, stmt in enumerate(node.body):
if isinstance(stmt, ast.ImportFrom) and stmt.module == '__future__':
idx = i + 1
node.body[idx:idx] = ptl_imports
return self.generic_visit(node)
:return: Path to the resulting byte compiled file.
def visit_FunctionDef(self, node):
name = node.name
if not re.match('_q_(html|plain)_template_', name):
# just a normal function
self.__template_type.append(None)
node = self.generic_visit(node)
else:
if name.startswith(PLAIN_TEMPLATE_PREFIX):
node.name = name[len(PLAIN_TEMPLATE_PREFIX):]
template_type = "plain"
elif name.startswith(HTML_TEMPLATE_PREFIX):
node.name = name[len(HTML_TEMPLATE_PREFIX):]
template_type = "html"
else:
raise RuntimeError('unknown prefix on %s' % name)
self.__template_type.append(template_type)
node = self.generic_visit(node)
# _q_output = _q_TemplateIO()
klass = ast.Name(id='_q_TemplateIO', ctx=ast.Load())
arg = ast.NameConstant(template_type == 'html')
instance = ast.Call(func=klass, args=[arg], keywords=[],
starargs=None, kwargs=None)
assign_name = ast.Name(id='_q_output', ctx=ast.Store())
assign = ast.Assign(targets=[assign_name], value=instance)
ast.copy_location(assign, node)
ast.fix_missing_locations(assign)
node.body.insert(0, assign)
# return _q_output.getvalue()
n = ast.Name(id='_q_output', ctx=ast.Load())
n = ast.Attribute(value=n, attr='getvalue', ctx=ast.Load())
n = ast.Call(func=n, args=[], keywords=[], starargs=None,
kwargs=None)
ret = ast.Return(value=n)
ast.copy_location(ret, node.body[-1])
ast.fix_missing_locations(ret)
node.body.append(ret)
self.__template_type.pop()
return node
def visit_Expr(self, node):
if self._get_template_type() is not None:
node = self.generic_visit(node)
# Instead of discarding objects on the stack, call
# "_q_output += obj".
lval = ast.Name(id='_q_output', ctx=ast.Store())
ast.copy_location(lval, node)
aug = ast.AugAssign(target=lval, op=ast.Add(), value=node.value)
return ast.copy_location(aug, node)
else:
return node
def visit_Str(self, node):
if "html" == self._get_template_type():
n = ast.Name(id='_q_htmltext', ctx=ast.Load())
ast.copy_location(n, node)
n = ast.Call(func=n, args=[node], keywords=[], starargs=None,
kwargs=None)
return ast.copy_location(n, node)
else:
return node
_template_re = re.compile(r'''
^(?P<indent>[ \t]*) def (?:[ \t]+)
(?P<name>[a-zA-Z_][a-zA-Z_0-9]*)
(?:[ \t]*) \[(?P<type>plain|html)\] (?:[ \t]*)
(?:[ \t]*[\(\\])
''', re.MULTILINE|re.VERBOSE)
def translate_tokens(buf):
Do note that FileExistsError is raised if cfile ends up pointing at a
non-regular file or symlink. Because the compilation uses a file renaming,
the resulting file would be regular and thus not the same type of file as
it was previously.
"""
Since we can't modify the parser in the builtin parser module we
must do token translation here. Luckily it does not affect line
numbers.
def foo [plain] (...): -> def _q_plain_template__foo(...):
def foo [html] (...): -> def _q_html_template__foo(...):
XXX This parser is too stupid. For example, it doesn't understand
triple quoted strings.
"""
def replacement(match):
template_type = match.group('type')
return '%sdef _q_%s_template_%s(' % (match.group('indent'),
template_type,
match.group('name'))
return _template_re.sub(replacement, buf)
def parse(buf, filename='<string>'):
if isinstance(buf, bytes):
buf = importlib.util.decode_source(buf)
buf = translate_tokens(buf)
# derived from py_compile.compile
if os.path.islink(cfile):
msg = ('{} is a symlink and will be changed into a regular file if '
'import writes a byte-compiled file to it')
raise FileExistsError(msg.format(cfile))
elif os.path.exists(cfile) and not os.path.isfile(cfile):
msg = ('{} is a non-regular file and will be changed into a regular '
'one if import writes a byte-compiled file to it')
raise FileExistsError(msg.format(cfile))
loader = PTLFileLoader('<ptl_compile>', file)
source_bytes = loader.get_data(file)
try:
node = ast.parse(buf, filename)
except SyntaxError as e:
# set the filename attribute
raise SyntaxError(str(e), (filename, e.lineno, e.offset, e.text))
t = TemplateTransformer()
return t.visit(node)
PTL_EXT = ".ptl"
def dump(code, filename, fp):
mtime = os.stat(filename)[stat.ST_MTIME]
fp.write(b'\0\0\0\0')
fp.write(struct.pack('<I', mtime))
marshal.dump(code, fp)
fp.flush()
fp.seek(0)
fp.write(imp.get_magic())
_compile = compile
def compile_template(input, filename, output=None):
"""(input, filename, output=None) -> code
Compile an open file.
If output is not None then the code is written to output.
The code object is returned.
"""
node = parse(input.read(), filename)
code = _compile(node, filename, 'exec')
if output is not None:
dump(code, filename, output)
return code
def compile(inputname, outputname):
"""(inputname, outputname)
Compile a template file. The new template is written to outputname.
"""
input = open(inputname)
output = open(outputname, "wb")
try:
compile_template(input, inputname, output)
except:
# don't leave a corrupt .pyc file around
output.close()
os.unlink(outputname)
raise
def compile_file(filename, force=0, verbose=0):
if filename.endswith(PTL_EXT):
cfile = filename[:-4] + '.pyc'
ftime = os.stat(filename)[stat.ST_MTIME]
try:
ctime = os.stat(cfile)[stat.ST_MTIME]
except os.error:
ctime = 0
if (ctime > ftime) and not force:
code = loader.source_to_code(None, source_bytes, dfile or file,
_optimize=optimize)
except Exception as err:
py_exc = py_compile.PyCompileError(err.__class__, err, dfile or file)
if doraise:
raise py_exc
else:
sys.stderr.write(py_exc.msg + '\n')
return
if verbose:
print('Compiling', filename, '...')
ok = compile(filename, cfile)
try:
dirname = os.path.dirname(cfile)
if dirname:
os.makedirs(dirname)
except FileExistsError:
pass
source_stats = loader.path_stats(file)
bytecode = importlib._bootstrap_external._code_to_bytecode(
code, source_stats['mtime'], source_stats['size'])
mode = importlib._bootstrap_external._calc_mode(file)
importlib._bootstrap_external._write_atomic(cfile, bytecode, mode)
return cfile
def compile_dir(dir, maxlevels=10, force=0):
"""Byte-compile all PTL modules in the given directory tree.
(Adapted from compile_dir in Python module: compileall.py)
# derived from compileall.py
def _walk_dir(dir, ddir=None, maxlevels=10, quiet=0):
if not quiet:
print('Listing {!r}...'.format(dir))
try:
names = os.listdir(dir)
except OSError:
if quiet < 2:
print("Can't list {!r}".format(dir))
names = []
names.sort()
for name in names:
if name == '__pycache__':
continue
fullname = os.path.join(dir, name)
if ddir is not None:
dfile = os.path.join(ddir, name)
else:
dfile = None
if not os.path.isdir(fullname):
yield fullname
elif (maxlevels > 0 and name != os.curdir and name != os.pardir and
os.path.isdir(fullname) and not os.path.islink(fullname)):
yield from _walk_dir(fullname, ddir=dfile,
maxlevels=maxlevels - 1, quiet=quiet)
# derived from compileall.py
def compile_dir(dir, maxlevels=10, ddir=None, force=False, rx=None,
quiet=0, legacy=False, optimize=-1, workers=1):
"""Byte-compile all modules in the given directory tree.
Arguments (only dir is required):
dir: the directory to byte-compile
maxlevels: maximum recursion level (default 10)
force: if true, force compilation, even if timestamps are up-to-date
ddir: the directory that will be prepended to the path to the
file as it is compiled into each byte-code file.
force: if True, force compilation, even if timestamps are up-to-date
quiet: full output with False or 0, errors only with 1,
no output with 2
legacy: if True, produce legacy pyc paths instead of PEP 3147 paths
optimize: optimization level or -1 for level of the interpreter
workers: maximum number of parallel workers
"""
print('Listing', dir, '...')
try:
names = os.listdir(dir)
except os.error:
print("Can't list", dir)
names = []
names.sort()
files = _walk_dir(dir, quiet=quiet, maxlevels=maxlevels, ddir=ddir)
success = 1
for name in names:
fullname = os.path.join(dir, name)
if os.path.isfile(fullname):
for file in files:
if not compile_file(file, ddir, force, rx, quiet,
legacy, optimize):
success = 0
return success
# derived from compileall.py
def compile_file(fullname, ddir=None, force=False, rx=None, quiet=0,
legacy=False, optimize=-1):
"""Byte-compile one file.
Arguments (only fullname is required):
fullname: the file to byte-compile
ddir: if given, the directory name compiled in to the
byte-code file.
force: if True, force compilation, even if timestamps are up-to-date
quiet: full output with False or 0, errors only with 1,
no output with 2
legacy: if True, produce legacy pyc paths instead of PEP 3147 paths
optimize: optimization level or -1 for level of the interpreter
"""
success = 1
name = os.path.basename(fullname)
if ddir is not None:
dfile = os.path.join(ddir, name)
else:
dfile = None
if rx is not None:
mo = rx.search(fullname)
if mo:
return success
if os.path.isfile(fullname):
if legacy:
cfile = fullname[:-len(PTL_EXT)] + '.pyc'
else:
if optimize >= 0:
opt = optimize if optimize >= 1 else ''
cfile = importlib.util.cache_from_source(
fullname, optimization=opt)
else:
cfile = importlib.util.cache_from_source(fullname)
cache_dir = os.path.dirname(cfile)
head, tail = name[:-4], name[-4:]
if tail == PTL_EXT:
if not force:
try:
mtime = int(os.stat(fullname).st_mtime)
expect = struct.pack('<4sl', importlib.util.MAGIC_NUMBER,
mtime)
with open(cfile, 'rb') as chandle:
actual = chandle.read(8)
if expect == actual:
return success
except OSError:
pass
if not quiet:
print('Compiling {!r}...'.format(fullname))
try:
ok = compile_file(fullname, force=force, verbose=1)
except KeyboardInterrupt:
raise KeyboardInterrupt
except:
# XXX compile catches SyntaxErrors
if type(sys.exc_type) == type(''):
exc_type_name = sys.exc_type
else: exc_type_name = sys.exc_type.__name__
print('Sorry:', exc_type_name + ':',)
print(sys.exc_value)
ok = ptl_compile(fullname, cfile, dfile, True,
optimize=optimize)
except py_compile.PyCompileError as err:
success = 0
if quiet >= 2:
return success
elif quiet:
print('*** Error compiling {!r}...'.format(fullname))
else:
print('*** ', end='')
# escape non-printable characters in msg
msg = err.msg.encode(sys.stdout.encoding,
errors='backslashreplace')
msg = msg.decode(sys.stdout.encoding)
print(msg)
except (SyntaxError, UnicodeError, OSError) as e:
success = 0
if quiet >= 2:
return success
elif quiet:
print('*** Error compiling {!r}...'.format(fullname))
else:
print('*** ', end='')
print(e.__class__.__name__ + ':', e)
else:
if ok == 0:
success = 0
elif (maxlevels > 0 and name != os.curdir and name != os.pardir and
os.path.isdir(fullname) and not os.path.islink(fullname)):
if not compile_dir(fullname, maxlevels - 1, force):
success = 0
return success
def compile_package(path, force=0, verbose=0):
# derived from compileall.py
def compile_path(skip_curdir=1, maxlevels=0, force=False, quiet=0,
legacy=False, optimize=-1):
"""Byte-compile all module on sys.path.
Arguments (all optional):
skip_curdir: if true, skip current directory (default True)
maxlevels: max recursion level (default 0)
force: as for compile_dir() (default False)
quiet: as for compile_dir() (default 0)
legacy: as for compile_dir() (default False)
optimize: as for compile_dir() (default -1)
"""
success = 1
for dir in sys.path:
if (not dir or dir == os.curdir) and skip_curdir:
if quiet < 2:
print('Skipping current directory')
else:
success = success and compile_dir(dir, maxlevels, None,
force, quiet=quiet,
legacy=legacy, optimize=optimize)
return success
def compile_package(paths, force=0, verbose=0):
"""Compile all PTL files in a package. 'path' should be a list
of directory names containing the files of the package (i.e. __path__).
"""
for package_dir in path:
for dirpath, dirnames, filenames in os.walk(package_dir):
for dirname in dirnames:
compile_file(os.path.join(dirpath, dirname), force=force,
verbose=verbose)
for filename in filenames:
compile_file(os.path.join(dirpath, filename), force=force,
verbose=verbose)
for path in paths:
compile_dir(path, quiet=0 if verbose else 1, force=force, legacy=1)
def main():
args = sys.argv[1:]
if not args:
print("no files to compile")
else:
for filename in args:
path, ext = os.path.splitext(filename)
compile(filename, path + ".pyc")
"""Script main program."""
import argparse
if __name__ == "__main__":
main()
parser = argparse.ArgumentParser(
description='Utilities to support installing Python libraries.')
parser.add_argument('-l', action='store_const', const=0,
default=10, dest='maxlevels',
help="don't recurse into subdirectories")
parser.add_argument('-r', type=int, dest='recursion',
help=('control the maximum recursion level. '
'if `-l` and `-r` options are specified, '
'then `-r` takes precedence.'))
parser.add_argument('-f', action='store_true', dest='force',
help='force rebuild even if timestamps are up to date')
parser.add_argument('-q', action='count', dest='quiet', default=0,
help='output only error messages; -qq will suppress '
'the error messages as well.')
parser.add_argument('-b', action='store_true', dest='legacy',
help='use legacy (pre-PEP3147) compiled file locations')
parser.add_argument('-d', metavar='DESTDIR', dest='ddir', default=None,
help=('directory to prepend to file paths for use in '
'compile-time tracebacks and in runtime '
'tracebacks in cases where the source file is '
'unavailable'))
parser.add_argument('-x', metavar='REGEXP', dest='rx', default=None,
help=('skip files matching the regular expression; '
'the regexp is searched for in the full path '
'of each file considered for compilation'))
parser.add_argument('-i', metavar='FILE', dest='flist',
help=('add all the files and directories listed in '
'FILE to the list considered for compilation; '
'if "-", names are read from stdin'))
parser.add_argument('compile_dest', metavar='FILE|DIR', nargs='*',
help=('zero or more file and directory names '
'to compile; if no arguments given, defaults '
'to the equivalent of -l sys.path'))
parser.add_argument('-j', '--workers', default=1,
type=int, help='Run compileall concurrently')
args = parser.parse_args()
compile_dests = args.compile_dest
if (args.ddir and (len(compile_dests) != 1
or not os.path.isdir(compile_dests[0]))):
parser.exit('-d destdir requires exactly one directory argument')
if args.rx:
import re
args.rx = re.compile(args.rx)
if args.recursion is not None:
maxlevels = args.recursion
else:
maxlevels = args.maxlevels
# if flist is provided then load it
if args.flist:
try:
with (sys.stdin if args.flist=='-' else open(args.flist)) as f:
for line in f:
compile_dests.append(line.strip())
except OSError:
if args.quiet < 2:
print("Error reading file list {}".format(args.flist))
return False
if args.workers is not None:
args.workers = args.workers or None
success = True
try:
if compile_dests:
for dest in compile_dests:
if os.path.isfile(dest):
if not compile_file(dest, args.ddir, args.force, args.rx,
args.quiet, args.legacy):
success = False
else:
if not compile_dir(dest, maxlevels, args.ddir,
args.force, args.rx, args.quiet,
args.legacy, workers=args.workers):
success = False
return success
else:
return compile_path(legacy=args.legacy, force=args.force,
quiet=args.quiet)
except KeyboardInterrupt:
if args.quiet < 2:
print("\n[interrupted]")
return False
return True
if __name__ == '__main__':
exit_status = int(not main())
sys.exit(exit_status)

View File

@ -4,14 +4,16 @@ as if they were Python modules.
import sys
from importlib.machinery import FileFinder, PathFinder, SourceFileLoader
from .ptl_parse import parse
from quixote.ptl.ptl_compile import parse, PTL_EXT
PTL_EXT = ".ptl"
class PTLFileLoader(SourceFileLoader):
@staticmethod
def source_to_code(data, path='<string>'):
def source_to_code(self, data, path, *, _optimize=-1):
node = parse(data, path)
return compile(node, path, 'exec')
return compile(node, path, 'exec', dont_inherit=True,
optimize=_optimize)
class PTLPathFinder(PathFinder):

156
quixote/ptl/ptl_parse.py Normal file
View File

@ -0,0 +1,156 @@
"""Parse a PTL template, return AST node.
First template function names are mangled, noting the template type.
Next, the file is parsed into a parse tree. This tree is converted into
a modified AST. It is during this state that the semantics are modified
by adding extra nodes to the tree. The modified AST is returned, ready
to pass to the compile() built-in function.
"""
import ast
import re
import importlib.util
HTML_TEMPLATE_PREFIX = "_q_html_template_"
PLAIN_TEMPLATE_PREFIX = "_q_plain_template_"
class TemplateTransformer(ast.NodeTransformer):
def __init__(self, *args, **kwargs):
ast.NodeTransformer.__init__(self, *args, **kwargs)
# __template_type is a stack whose values are
# "html", "plain", or None
self.__template_type = [None]
def _get_template_type(self):
"""Return the type of the function being compiled (
"html", "plain", or None)
"""
if self.__template_type:
return self.__template_type[-1]
else:
return None
def visit_Module(self, node):
html_imp = ast.ImportFrom(module='quixote.html',
names=[ast.alias(name='TemplateIO',
asname='_q_TemplateIO'),
ast.alias(name='htmltext',
asname='_q_htmltext')],
level=0)
ast.fix_missing_locations(html_imp)
vars_imp = ast.ImportFrom(module='builtins',
names=[ast.alias(name='vars',
asname='_q_vars')], level=0)
ast.fix_missing_locations(vars_imp)
ptl_imports = [vars_imp, html_imp]
# skip __future__ statements
idx = 0
for i, stmt in enumerate(node.body):
if isinstance(stmt, ast.ImportFrom) and stmt.module == '__future__':
idx = i + 1
node.body[idx:idx] = ptl_imports
return self.generic_visit(node)
def visit_FunctionDef(self, node):
name = node.name
if not re.match('_q_(html|plain)_template_', name):
# just a normal function
self.__template_type.append(None)
node = self.generic_visit(node)
else:
if name.startswith(PLAIN_TEMPLATE_PREFIX):
node.name = name[len(PLAIN_TEMPLATE_PREFIX):]
template_type = "plain"
elif name.startswith(HTML_TEMPLATE_PREFIX):
node.name = name[len(HTML_TEMPLATE_PREFIX):]
template_type = "html"
else:
raise RuntimeError('unknown prefix on %s' % name)
self.__template_type.append(template_type)
node = self.generic_visit(node)
# _q_output = _q_TemplateIO()
klass = ast.Name(id='_q_TemplateIO', ctx=ast.Load())
arg = ast.NameConstant(template_type == 'html')
instance = ast.Call(func=klass, args=[arg], keywords=[],
starargs=None, kwargs=None)
assign_name = ast.Name(id='_q_output', ctx=ast.Store())
assign = ast.Assign(targets=[assign_name], value=instance)
ast.copy_location(assign, node)
ast.fix_missing_locations(assign)
node.body.insert(0, assign)
# return _q_output.getvalue()
n = ast.Name(id='_q_output', ctx=ast.Load())
n = ast.Attribute(value=n, attr='getvalue', ctx=ast.Load())
n = ast.Call(func=n, args=[], keywords=[], starargs=None,
kwargs=None)
ret = ast.Return(value=n)
ast.copy_location(ret, node.body[-1])
ast.fix_missing_locations(ret)
node.body.append(ret)
self.__template_type.pop()
return node
def visit_Expr(self, node):
if self._get_template_type() is not None:
node = self.generic_visit(node)
# Instead of discarding objects on the stack, call
# "_q_output += obj".
lval = ast.Name(id='_q_output', ctx=ast.Store())
ast.copy_location(lval, node)
aug = ast.AugAssign(target=lval, op=ast.Add(), value=node.value)
return ast.copy_location(aug, node)
else:
return node
def visit_Str(self, node):
if "html" == self._get_template_type():
n = ast.Name(id='_q_htmltext', ctx=ast.Load())
ast.copy_location(n, node)
n = ast.Call(func=n, args=[node], keywords=[], starargs=None,
kwargs=None)
return ast.copy_location(n, node)
else:
return node
_template_re = re.compile(r'''
^(?P<indent>[ \t]*) def (?:[ \t]+)
(?P<name>[a-zA-Z_][a-zA-Z_0-9]*)
(?:[ \t]*) \[(?P<type>plain|html)\] (?:[ \t]*)
(?:[ \t]*[\(\\])
''', re.MULTILINE|re.VERBOSE)
def translate_tokens(buf):
"""
Since we can't modify the parser in the builtin parser module we
must do token translation here. Luckily it does not affect line
numbers.
def foo [plain] (...): -> def _q_plain_template__foo(...):
def foo [html] (...): -> def _q_html_template__foo(...):
XXX This parser is too stupid. For example, it doesn't understand
triple quoted strings.
"""
def replacement(match):
template_type = match.group('type')
return '%sdef _q_%s_template_%s(' % (match.group('indent'),
template_type,
match.group('name'))
return _template_re.sub(replacement, buf)
def parse(buf, filename='<string>'):
if isinstance(buf, bytes):
buf = importlib.util.decode_source(buf)
buf = translate_tokens(buf)
try:
node = ast.parse(buf, filename)
except SyntaxError as e:
# set the filename attribute
raise SyntaxError(str(e), (filename, e.lineno, e.offset, e.text))
t = TemplateTransformer()
return t.visit(node)