From 12f97b685de1cf13a6b6ab0dd34bcd771b2b9c6f Mon Sep 17 00:00:00 2001 From: Alexis de Lattre Date: Thu, 9 Jan 2020 23:41:21 +0100 Subject: [PATCH] Release 1.6: generate XMP via simple format() instead of using lxml --- README.rst | 4 + facturx/_version.py | 2 +- facturx/facturx.py | 197 ++++++++++++++++++++++---------------------- 3 files changed, 103 insertions(+), 100 deletions(-) diff --git a/README.rst b/README.rst index 309fe29..cf9e4f0 100644 --- a/README.rst +++ b/README.rst @@ -65,6 +65,10 @@ Contributors Changelog ========= +* Version 1.6 dated 2020-01-09 + + * Generate XMP (XML-based PDF metadata) via string replacement instead of using XML lib + * Version 1.5 dated 2019-11-13 * Fix bug in generate_facturx_from_file() when using argument additional_attachments diff --git a/facturx/_version.py b/facturx/_version.py index fcb6b5d..6d5e09d 100644 --- a/facturx/_version.py +++ b/facturx/_version.py @@ -1 +1 @@ -__version__ = '1.5' +__version__ = '1.6' diff --git a/facturx/facturx.py b/facturx/facturx.py index b603ed7..13ef6db 100644 --- a/facturx/facturx.py +++ b/facturx/facturx.py @@ -330,105 +330,103 @@ def _prepare_pdf_metadata_txt(pdf_metadata): def _prepare_pdf_metadata_xml(facturx_level, pdf_metadata): - nsmap_x = {'x': 'adobe:ns:meta/'} - nsmap_rdf = {'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'} - nsmap_dc = {'dc': 'http://purl.org/dc/elements/1.1/'} - nsmap_pdf = {'pdf': 'http://ns.adobe.com/pdf/1.3/'} - nsmap_xmp = {'xmp': 'http://ns.adobe.com/xap/1.0/'} - nsmap_pdfaid = {'pdfaid': 'http://www.aiim.org/pdfa/ns/id/'} - nsmap_fx = { - 'fx': 'urn:factur-x:pdfa:CrossIndustryDocument:invoice:1p0#'} - ns_x = '{%s}' % nsmap_x['x'] - ns_dc = '{%s}' % nsmap_dc['dc'] - ns_rdf = '{%s}' % nsmap_rdf['rdf'] - ns_pdf = '{%s}' % nsmap_pdf['pdf'] - ns_xmp = '{%s}' % nsmap_xmp['xmp'] - ns_pdfaid = '{%s}' % nsmap_pdfaid['pdfaid'] - ns_fx = '{%s}' % nsmap_fx['fx'] - ns_xml = '{http://www.w3.org/XML/1998/namespace}' - - root = etree.Element(ns_x + 'xmpmeta', nsmap=nsmap_x) - rdf = etree.SubElement( - root, ns_rdf + 'RDF', nsmap=nsmap_rdf) - desc_pdfaid = etree.SubElement( - rdf, ns_rdf + 'Description', nsmap=nsmap_pdfaid) - desc_pdfaid.set(ns_rdf + 'about', '') - etree.SubElement( - desc_pdfaid, ns_pdfaid + 'part').text = '3' - etree.SubElement( - desc_pdfaid, ns_pdfaid + 'conformance').text = 'B' - desc_dc = etree.SubElement( - rdf, ns_rdf + 'Description', nsmap=nsmap_dc) - desc_dc.set(ns_rdf + 'about', '') - dc_title = etree.SubElement(desc_dc, ns_dc + 'title') - dc_title_alt = etree.SubElement(dc_title, ns_rdf + 'Alt') - dc_title_alt_li = etree.SubElement( - dc_title_alt, ns_rdf + 'li') - dc_title_alt_li.text = pdf_metadata.get('title', '') - dc_title_alt_li.set(ns_xml + 'lang', 'x-default') - dc_creator = etree.SubElement(desc_dc, ns_dc + 'creator') - dc_creator_seq = etree.SubElement(dc_creator, ns_rdf + 'Seq') - etree.SubElement( - dc_creator_seq, ns_rdf + 'li').text = pdf_metadata.get('author', '') - dc_desc = etree.SubElement(desc_dc, ns_dc + 'description') - dc_desc_alt = etree.SubElement(dc_desc, ns_rdf + 'Alt') - dc_desc_alt_li = etree.SubElement( - dc_desc_alt, ns_rdf + 'li') - dc_desc_alt_li.text = pdf_metadata.get('subject', '') - dc_desc_alt_li.set(ns_xml + 'lang', 'x-default') - desc_adobe = etree.SubElement( - rdf, ns_rdf + 'Description', nsmap=nsmap_pdf) - desc_adobe.set(ns_rdf + 'about', '') - producer = etree.SubElement( - desc_adobe, ns_pdf + 'Producer') - producer.text = 'PyPDF4' - desc_xmp = etree.SubElement( - rdf, ns_rdf + 'Description', nsmap=nsmap_xmp) - desc_xmp.set(ns_rdf + 'about', '') - creator = etree.SubElement( - desc_xmp, ns_xmp + 'CreatorTool') - creator.text = 'factur-x python lib v%s by Alexis de Lattre' % __version__ - timestamp = _get_metadata_timestamp() - etree.SubElement(desc_xmp, ns_xmp + 'CreateDate').text = timestamp - etree.SubElement(desc_xmp, ns_xmp + 'ModifyDate').text = timestamp - - xmp_file = resource_filename( - __name__, 'xmp/Factur-X_extension_schema.xmp') - # Reason for defining a parser below: - # http://lxml.de/FAQ.html#why-doesn-t-the-pretty-print-option-reformat-my-xml-output - parser = etree.XMLParser(remove_blank_text=True) - facturx_ext_schema_root = etree.parse(open(xmp_file), parser) - # The Factur-X extension schema must be embedded into each PDF document - facturx_ext_schema_desc_xpath = facturx_ext_schema_root.xpath( - '//rdf:Description', namespaces=nsmap_rdf) - rdf.append(facturx_ext_schema_desc_xpath[1]) - # Now is the Factur-X description tag - facturx_desc = etree.SubElement( - rdf, ns_rdf + 'Description', nsmap=nsmap_fx) - facturx_desc.set(ns_rdf + 'about', '') - fx_doc_type = etree.SubElement( - facturx_desc, ns_fx + 'DocumentType', nsmap=nsmap_fx) - fx_doc_type.text = 'INVOICE' - fx_doc_filename = etree.SubElement( - facturx_desc, ns_fx + 'DocumentFileName', nsmap=nsmap_fx) - fx_doc_filename.text = FACTURX_FILENAME - fx_doc_version = etree.SubElement( - facturx_desc, ns_fx + 'Version', nsmap=nsmap_fx) - fx_doc_version.text = '1.0' - fx_conformance_level = etree.SubElement( - facturx_desc, ns_fx + 'ConformanceLevel', nsmap=nsmap_fx) - fx_conformance_level.text = FACTURX_LEVEL2xmp[facturx_level] - - # TODO: should be UTF-16be ?? - xml_str = etree.tostring( - root, pretty_print=True, encoding="UTF-8", xml_declaration=False) - head = ''.encode( - 'utf-8') - tail = ''.encode('utf-8') - xml_final_str = head + xml_str + tail + xml_str = """ + + + + + 3 + B + + + + + {title} + + + + + {author} + + + + + {subject} + + + + + {producer} + + + {creator_tool} + {timestamp} + {timestamp} + + + + + + Factur-X PDFA Extension Schema + urn:factur-x:pdfa:CrossIndustryDocument:invoice:1p0# + fx + + + + DocumentFileName + Text + external + name of the embedded XML invoice file + + + DocumentType + Text + external + INVOICE + + + Version + Text + external + The actual version of the Factur-X XML schema + + + ConformanceLevel + Text + external + The conformance level of the embedded Factur-X data + + + + + + + + + {facturx_documenttype} + {facturx_filename} + {facturx_version} + {facturx_level} + + + + +""" + xml_str.format( + title=pdf_metadata.get('title', ''), + author=pdf_metadata.get('author', ''), + subject=pdf_metadata.get('subject', ''), + producer='PyPDF4', + creator_tool='factur-x python lib v%s by Alexis de Lattre' % __version__, + timestamp=_get_metadata_timestamp(), + facturx_documenttype='INVOICE', + facturx_filename=FACTURX_FILENAME, + facturx_version='1.0', + facturx_level=FACTURX_LEVEL2xmp[facturx_level]) + xml_byte = xml_str.encode('utf-8') logger.debug('metadata XML:') - logger.debug(xml_final_str) - return xml_final_str + logger.debug(xml_byte) + return xml_byte # def createByteObject(string): @@ -813,7 +811,8 @@ def generate_facturx_from_file( else: file_type = 'file' xml_root = None - if isinstance(facturx_xml, str): + # in Python3, xml_string is a byte + if isinstance(facturx_xml, (str, bytes)): xml_string = facturx_xml elif isinstance(facturx_xml, unicode): xml_string = facturx_xml.encode('utf8')