expression/src/core/xmlschemas.py

# -*- coding: UTF-8 -*-


# Expression
# By: Frederic Peters <fpeters@entrouvert.com>
#     Emmanuel Raviart <eraviart@entrouvert.com>
#
# Copyright (C) 2004 Entr'ouvert, Frederic Peters & Emmanuel Raviart
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.


"""XML Schema."""


import datetime
import time

import dataholders
import elements
import filesystems
import logs
import namespaces
import stations


_schemaHolders = {}
objectTypeClasses = {}


class Schema(elements.Element):
    """ An XML Schema definition.
    Defines the structure of an document, and specifically field types.
    """
    _schemaContext = None
    def __init__(self, *a, **b):
        """ Initializes a libxml2 schema context, ensuring the schema itself is valid.
        """
        super(Schema, self).__init__(*a, **b)
        parserContext = self.node.doc.schemaNewDocParserCtxt()
        self._schemaContext = parserContext.schemaParse()

    def buildXformsModelContext(self, context):
        if context.schemas is None:
            context.schemas = SchemasContext(
                context.specimen, previous = context,
                uriPathFragment = "schemas")
        schemasContext = context.schemas
        if schemasContext.schemas is None:
            schemasContext.schemas = []
        schemasContext.schemas.append(self)

    def getAttributeInNodeType(self, value, node, name):
        """ Returns a TypeContext for attribute "name" whose definition is a child of "node".
        Argument "value" is a ModelContext to be used as the context specimen.
        """
        attributeNode = self.getAttributeNodeInNode(node, name)
        if attributeNode is not None:
            return self.getNodeType(value, attributeNode)

    def getAttributeNodeInNode(self, node, name):
        """ Returns the matching "xsd:attribute" element node found as a child of "node".
        """
        for node in self.evaluateXpath("xsd:attribute[@name = '%s']" % name.replace("'", "&apos;"), node):
            return node

    def getNodeType(self, value, node):
        """ Returns a TypeContext for the element of attribute defined at "node".
        Argument "value" is a ModelContext to be used as the context specimen.

        Handles both type names (type="...") and embedded type definition.
        """
        typeName = node.prop("type")
        if typeName:
            return self.getGlobalType(value, typeName)
        for typeNode in self.evaluateXpath("*", node):
            return self.newTypeContext(value, typeNode)

    def getElementInNodeType(self, value, node, name):
        """ Returns a TypeContext for element "name" whose definition is a child of node "node".
        Argument "value" is a ModelContext to be used as the context specimen.
        """
        elementNode = self.getElementNodeInNode(node, name)
        if elementNode is not None:
            return self.getNodeType(value, elementNode)

    def getElementNodeInNode(self, node, name):
        """ Returns the "name"-matching "xsd:element" element node amongst children of "node".
        """
        for node in self.evaluateXpath("xsd:element[@name = '%s']" % name.replace("'", "&apos;"), node):
            return node

    def getGlobalElementNode(self, name, scannedSchemas = None):
        """ Seeks and returns the definition node for element "name" in this schemas first, then in included schemas.
        Argument scannedSchemas is a used internaly to avoid infinite recursive loop.
        """
        if scannedSchemas is None:
            scannedSchemas = []
        elif self in scannedSchemas:
            return None
        scannedSchemas.append(self)
        elementNode = self.getElementNodeInNode(self.node, name)
        if elementNode is not None:
            return elementNode
        for schemaLocation in self.getIncludeLocations():
            schema = self.getSchemaAtLocation(schemaLocation)
            return schema.getGlobalElementNode(name, scannedSchemas = scannedSchemas)

    def getGlobalElementType(self, value, name, scannedSchemas = None):
        """ Returns a TypeContext for the element "name" defined in this schema.
        Argument "value" is a ModelContext to be used as the context specimen.
        Argument scannedSchemas is a used internaly to avoid infinite recursive loop.
        """
        elementNode = self.getGlobalElementNode(name, scannedSchemas = scannedSchemas)
        if elementNode is not None:
            return self.getNodeType(value, elementNode)

    def getGlobalType(self, value, name, scannedSchemas = None):
        """ Returns a TypeContext for a type called "name" within all those we know.
        Argument "value" is a ModelContext to be used as the context specimen.
        Argument scannedSchemas is a used internaly to avoid infinite recursive loop.
        """
        # remember that I was here and avoid infinite recursive loop
        if scannedSchemas is None:
            scannedSchemas = []
        elif self in scannedSchemas:
            return None
        scannedSchemas.append(self)
        # see whether we know this type as a standard xsd type
        # FIXME.
        if name.startswith("xsd:") or name == EmailAddress.name:
            typeClasses = {
                "xsd:boolean": Boolean,
                "xsd:date": Date,
                "xsd:dateTime": DateTime,
                "xsd:integer": Integer,
                "xsd:string": String,
                "xsd:anyURI": String,
                EmailAddress.name: EmailAddress,
                }
            if name in typeClasses:
                typeClass = typeClasses[name]
            else:
                logs.debug("""Using class "String" for type "%s".""" % name)
                typeClass = String
            return TypeContext(typeClass(), value, previous = value)

        # seek the definition of type "name" within me
        globalType = self.getTypeInNode(value, self.node, name)
        if globalType is not None:
            return globalType

        # recurse in included schemas
        for schemaLocation in self.getIncludeLocations():
            schema = self.getSchemaAtLocation(schemaLocation)
            globalType = schema.getGlobalType(value, name, scannedSchemas = scannedSchemas)
            if globalType is not None:
                return globalType

    def getIncludeLocations(self):
        """ Returns the list of included and imported schema locations.
        """
        return [
            node.content
            for node in self.evaluateXpath("(xsd:include|xsd:import)/@schemaLocation")
        ]

    def getSchemaAtLocation(self, schemaLocation):
        """ Returns the schema object found at location schemaLocation.
        """
        # do not use walkToLocation, convert to absolute path and get it
        schemaAbsolutePath = self.convertRelativeLocationToAbsolutePath(schemaLocation)
        if schemaAbsolutePath is None:
            logs.debug('Missing schema file "%s".' % schemaLocation)
            return None
        schemaHolder = getSchemaHolder(schemaAbsolutePath)
        if schemaHolder is None:
            logs.debug('Missing schema file "%s".' % schemaLocation)
            return None
        return schemaHolder.getRootElement()

    def getTargetNamespace(self):
        """ Returns the target namespace uri.
        """
        return self.node.prop("targetNamespace")

    def getTypeInNode(self, value, node, name):
        """ Returns a TypeContext for element or attribute "name".
        Argument "value" is a ModelContext to be used as the context specimen.
        Argument "node" is the context node (where to look for the element or attribute declaration).
        """
        name = name.replace("'", "&apos;")
        for typeNode in self.evaluateXpath("*[@name = '%s']" % name, node):
            return self.newTypeContext(value, typeNode)
        targetNamespacePrefix = namespaces.getName(self.getTargetNamespace())
        if name[:len(targetNamespacePrefix) + 1] == "%s:" % targetNamespacePrefix:
            name = name[len(targetNamespacePrefix) + 1:]
        for typeNode in self.evaluateXpath("*[@name = '%s']" % name, node):
            return self.newTypeContext(value, typeNode)

    def newContext(self, specimen, *attributes, **keywords):
        """ Returns a SchemaContext.
        """
        return SchemaContext(self, specimen, *attributes, **keywords)

    def newTypeContext(self, value, node):
        """ Creates and returns a TypeContext.
        Its prototype is a Type element build around argument "node".
        Its specimen is the ModelContext "value" argument.
        """
        # FIXME: Handle other namespaces.
        type = Type(node, previous = self, owner = self)
        name = type.name
        if name in objectTypeClasses:
            typeClass = objectTypeClasses[name]
        else:
            #logs.debug("""Using class "Type" for type "%s".""" % name)
            typeClass = Type
        if typeClass != Type:
            type.__class__ = typeClass
        return TypeContext(type, value, previous = value)

    def validateDocument(self, doc, options = 0):
        """ Validates a libxml2 document. Returns True on pass.
        """
        validationContext = self._schemaContext.schemaNewValidCtxt()
        validationContext.schemaSetValidOptions(options)
        return doc.schemaValidateDoc(validationContext) == 0

    def validateElement(self, node, options = 0):
        """ Validates a libxml2 element node. Returns True on pass.
        """
        validationContext = self._schemaContext.schemaNewValidCtxt()
        validationContext.schemaSetValidOptions(options)
        return node.schemaValidateOneElement(validationContext) == 0


class SchemaContext(stations.AbstractContext):
    """ A Context with a Type as prototype and a ModelContext as specimen.
    """
    def getGlobalElementType(self, value, name, scannedSchemas = None):
        return self.prototype.getGlobalElementType(value, name, scannedSchemas = scannedSchemas)


class SchemaHolder(dataholders.XmlHolder):
    """ A .xsd file.
    """
    defaultFileNameExtension = ".xsd"


class SchemasContext(stations.AbstractContext):
    schemas = None

    def __init__(self, specimen, previous = None, uriPathFragment = None):
        super(SchemasContext, self).__init__(
            None, specimen, previous = previous, uriPathFragment = uriPathFragment)

    def getValueType(self, value):
        if self.schemas is None:
            return None
        elementName = value.node.name
        scannedSchemas = []
        for schema in self.schemas:
            elementType = schema.getGlobalElementType(
                value, elementName, scannedSchemas = scannedSchemas)
            if elementType is not None:
                return elementType
            scannedSchemas.append(self)
        return None

    def getPrototype(self):
        return self

    prototype = property(getPrototype)


class AbstractType:
    """ Field values are manipulated through their type class methods.
    This is the base class for Integer, String, etc, but also custom type classes (Identity, Session etc.)
    """
    def convertSubmissionValueToContent(self, submissionValue):
        """ Form submitted value -> XML storage format
        """
        return submissionValue

    def convertValueNodeToHtmlNodes(self, valueNode):
        """ XML node -> list of HTML nodes
        """
        return [valueNode.content]

    def convertValueNodeToHtmlAttributeValue(self, valueNode):
        """ XML node -> HTML attribute value
        """
        return valueNode.content

    def convertValueNodeToPlainText(self, valueNode):
        """ XML node -> plain text representation
        """
        return valueNode.content.decode("UTF-8")


class Type(AbstractType, elements.Element):
    """ A non-standard type.
    """
    def getAttributeType(self, value, attributeName):
        """ Returns a TypeContext for attribute "attributeName".
        Argument "value" is a ModelContext to be used as the context specimen.
        """
        # FIXME: Works only for:
        #   - xsd:complexType/xsd:attribute
        #   - xsd:complexType/xsd:complexContent/xsd:extension/xsd:attribute
        schema = self.getSchema()
        attributeType = schema.getAttributeInNodeType(value, self.node, attributeName)
        if attributeType is not None:
            return attributeType
        complexContentNodes = self.evaluateXpath("xsd:complexContent")
        if complexContentNodes:
            complexContentNode = complexContentNodes[0]
            extensionNodes = self.evaluateXpath("xsd:extension", complexContentNode)
            if extensionNodes:
                # First look for the attribute type in base type.
                extensionNode = extensionNodes[0]
                baseTypeNameNodes = self.evaluateXpath("@base", extensionNode)
                if not baseTypeNameNodes:
                    return None
                baseTypeName = baseTypeNameNodes[0].content
                baseType = schema.getGlobalType(value, baseTypeName)
                if baseType is None:
                    return None
                attributeType = baseType.prototype.getAttributeType(
                    value, attributeName)
                if attributeType is not None:
                    return attributeType
                # Then look for the attribute type in extension.
                return schema.getAttributeInNodeType(
                        value, extensionNode, attributeName)
            return None
        return None

    def getChildType(self, value, childName):
        """ Returns a TypeContext for element "childName".
        Argument "value" is a ModelContext to be used as the context specimen.
        """
        # FIXME: Works only for:
        #   - xsd:complexType/xsd:sequence
        #   - xsd:complexType/xsd:complexContent/xsd:extension/xsd:sequence
        #   - xsd:complexType/xsd:complexContent/xsd:extension/xsd:sequence/xsd:choice
        #   - xsd:complexType/xsd:complexContent/xsd:extension/xsd:sequence/xsd:group/@ref -> xsd:group[@ref]/xsd:sequence/xsd:element
        # remove the namespace prefix
        if "|" in childName:
            for name in childName.split("|"):
                type = self.getChildType(value, name)
                if type is not None:
                    return type
        if ":" in childName:
            childName = childName[childName.index(':') + 1:]
        schema = self.schema
        sequenceNodes = self.evaluateXpath("xsd:sequence")
        if sequenceNodes:
            sequenceNode = sequenceNodes[0]
            return schema.getElementInNodeType(value, sequenceNode, childName)
        complexContentNodes = self.evaluateXpath("xsd:complexContent")
        if complexContentNodes:
            complexContentNode = complexContentNodes[0]
            extensionNodes = self.evaluateXpath("xsd:extension", complexContentNode)
            if extensionNodes:
                extensionNode = extensionNodes[0]
                # First look for the attribute type in base type.
                baseTypeName = extensionNode.prop("base")
                if baseTypeName is None:
                    return None
                baseType = schema.getGlobalType(value, baseTypeName)
                if baseType is None:
                    return None
                childType = baseType.prototype.getChildType(value, childName)
                if childType is not None:
                    return childType
                # Then look for the attribute type in extension.
                sequenceNodes = self.evaluateXpath("xsd:sequence", extensionNode)
                for sequenceNode in sequenceNodes:
                    elementType = schema.getElementInNodeType(value, sequenceNode, childName)
                    if elementType is not None:
                        return elementType
                    choiceNodes = self.evaluateXpath("xsd:choice", sequenceNode)
                    for choiceNode in choiceNodes:
                        elementType = schema.getElementInNodeType(value, choiceNode, childName)
                        if elementType is not None:
                            return elementType
                    groupNodes = self.evaluateXpath("xsd:group", sequenceNode)
                    for groupNode in groupNodes:
                        ref = groupNode.prop("ref")
                        if ref:
                            sequenceNodes = schema.evaluateXpath("xsd:group[@name=\"%s\"]/xsd:sequence" % ref)
                            if sequenceNodes:
                                sequenceNode = sequenceNodes[0]
                                elementType = schema.getElementInNodeType(value, sequenceNode, childName)
                                if elementType is not None:
                                    return elementType
        return None

    def getName(self):
        """ Returns the name of this Type.
        """
        return self.node.prop("name")

    def getSchema(self):
        """ Returns the schema where this Type is defined.
        """
        return self.getParent()

    def getTypeAtXpath(self, value, xpath):
        """ Returns a TypeContext for the element or attribute at "xpath" in the specimen of ModelContext "value".
        """
        if not xpath or xpath == ".":
            return self
        splitedXpath = xpath.split("/", 1)
        name = splitedXpath[0]
        assert name
        if "[" in name:
            if name[0] == "(":
                # case of a "(ns:a|ns:b|ns:c)[3]" expression
                # try finding which of ns:a, ns:b and ns:c is the right one to look for
                for node in value.specimen.specimen.evaluateXpath(name):
                    name = node.name
                    # FIXME: prefix name with namespace name
                    break
            else:
                name = name[:name.index("[")]
        elif "|" in name:
            # case of a "ns:a|ns:b|ns:c" expression
            # try finding which of ns:a, ns:b and ns:c is the right one to look for
            for node in value.specimen.specimen.evaluateXpath(name):
                name = node.name
                break
        if name[0] == "@":
            # Attribute
            subType = self.getAttributeType(value, name[1:])
        else:
            # Child
            subType = self.getChildType(value, name)
        if subType is None:
            logs.info(
                """Unknown type for %s in "%s". Using xsd:string instead."""
                    % (name, self.name))
            return TypeContext(String(), value, previous = value)
        if len(splitedXpath) > 1:
            xpath = splitedXpath[1]
            if xpath:
                return subType.getTypeAtXpath(value, xpath)
        return subType

    name = property(getName)
    schema = property(getSchema)


class TypeContext(stations.AbstractContext):
    """ Tools to manipulate a value depending on its type.
    It maps its methods to its prototype's so it can be used in place of its Type.
    """
    def convertSubmissionValueToContent(self, submissionValue):
        return self.prototype.convertSubmissionValueToContent(submissionValue)

    def convertValueNodeToHtmlNodes(self, valueNode):
        return self.prototype.convertValueNodeToHtmlNodes(valueNode)

    def convertValueNodeToHtmlAttributeValue(self, valueNode):
        return self.prototype.convertValueNodeToHtmlAttributeValue(valueNode)

    def convertValueNodeToPlainText(self, valueNode):
        return self.prototype.convertValueNodeToPlainText(valueNode)

    def getTypeAtXpath(self, value, xpath):
        return self.prototype.getTypeAtXpath(value, xpath)


# Standard Types


class AbstractStandardType(AbstractType):
    """ Base class for all known XML Schema standard types (string, integer ...)
    """
    name = None # To override.


class Boolean(AbstractStandardType):
    name = "xsd:boolean"

    def convertSubmissionValueToContent(self, submissionValue):
        # unchecked checkboxes return None
        if submissionValue is None:
            submissionValue = "0"
        if not submissionValue in ("0", "1", "false", "true"):
            raise ValueError(
                "Invalid literal for boolean: %s" % submissionValue)
        return submissionValue

    def convertValueNodeToHtmlNodes(self, valueNode):
        content = valueNode.content
        if content in ("0", "false"):
            value = "false"
        elif content in ("1", "true"):
            value = "true"
        else:
            raise ValueError("Invalid literal for boolean: %s" % content)
        return [value]

    def convertValueNodeToHtmlAttributeValue(self, valueNode):
        content = valueNode.content
        if content in ("0", "false"):
            value = "false"
        elif content in ("1", "true"):
            value = "true"
        else:
            raise ValueError("Invalid literal for boolean: %s" % content)
        return value

    def convertValueNodeToPlainText(self, valueNode):
        content = valueNode.content
        if content in ("0", "false"):
            value = u"false"
        elif content in ("1", "true"):
            value = u"true"
        else:
            raise ValueError("Invalid literal for boolean: %s" % content)
        return value


class Date(AbstractStandardType):
    name = "xsd:date"

    def convertSubmissionValueToContent(self, submissionValue):
        if submissionValue == "":
            return None
        for format in (
            "%d/%m/%Y",
            "%d/%m/%y",
            "%Y-%m-%d",
            "%y-%m-%d",
        ):
            try:
                (year, month, day) = time.strptime(submissionValue, format)[:3]
            except ValueError:
                continue
            break
        else:
            raise ValueError("Invalid literal for date: %s" % submissionValue)
        date = datetime.date(year, month, day)
        return date.isoformat()

    def convertValueNodeToHtmlNodes(self, valueNode):
        return [self.convertValueNodeToPlainText(valueNode)]

    def convertValueNodeToHtmlAttributeValue(self, valueNode):
        return self.convertValueNodeToPlainText(valueNode)

    def convertValueNodeToPlainText(self, valueNode):
        content = valueNode.content
        if content:
            (year, month, day) = time.strptime(content, "%Y-%m-%d")[:3]
            return "%02d/%02d/%04d" % (day, month, year)
        else:
            return ""

class DateTime(Date):
    name = "xsd:dateTime"

    def convertSubmissionValueToContent(self, submissionValue):
        if submissionValue == "":
            return None
        formats = []
        for dateFormat in (
            "%d/%m/%Y",
            "%d/%m/%y",
            "%Y-%m-%d",
            "%y-%m-%d",
        ):
            for timeFormat in (
                "%H:%M:%S",
                "%H:%M",
                "",
            ):
                for separator in (" ", "T", ""):
                    formats.append("%s%s%s" % (dateFormat, separator, timeFormat))
        for format in formats:
            try:
                (year, month, day, hour, minute, second) = time.strptime(submissionValue[:19], format)[:6]
            except ValueError:
                continue
            break
        else:
            raise ValueError("Invalid literal for dateTime: %s" % submissionValue)
        date = datetime.datetime(year, month, day, hour, minute, second)
        return date.isoformat()

    def convertValueNodeToPlainText(self, valueNode):
        content = valueNode.content
        if content:
            (year, month, day, hour, minute, second) = time.strptime(content[:19], "%Y-%m-%dT%H:%M:%S")[:6]
            return "%02d/%02d/%04d %02d:%02d:%02d" % (day, month, year, hour, minute, second)
        else:
            return ""


class Integer(AbstractStandardType):
    name = "xsd:integer"

    def convertSubmissionValueToContent(self, submissionValue):
        value = int(submissionValue)
        return str(value)

    def convertValueNodeToHtmlNodes(self, valueNode):
        content = valueNode.content
        value = int(content)
        return [str(value)]

    def convertValueNodeToHtmlAttributeValue(self, valueNode):
        content = valueNode.content
        value = int(content)
        return str(value)

    def convertValueNodeToPlainText(self, valueNode):
        content = valueNode.content
        value = int(content)
        return unicode(value)


class String(AbstractStandardType):
    name = "xsd:string"


# Custom Types


class EmailAddress(String):
    name = "yep:emailAddress"

    def convertSubmissionValueToContent(self, submissionValue):
        value = submissionValue.replace(" ", "").lower()
        if value == "": return value
        if value.count("@") == 1:
            import re
            if not re.match("[][()<>|;^,/\200-\377]", value):
                (name, domain) = value.split("@")
                if "." in domain:
                    domainNames = domain.split(".")
                    if len(domainNames) >= 2 and "" not in domainNames:
                        return value
        raise ValueError("Invalid literal for email address: %s" % submissionValue)


class ObjectType(Type):
    def convertValueNodeToHtmlNodes(self, valueNode):
        # FIXME: To upgrade. No currentActionHandler anymore.
##         sourceNodes = environs.getVar("currentActionHandler").evaluateXpath(
##             "@src", valueNode)
##         if sourceNodes:
##             sourceLocation = sourceNodes[0].content
##             sourceHolder = self.walkToLocation(sourceLocation)
##             if sourceHolder is not None:
##                 return [html.a(sourceHolder.simpleLabel,
##                         href = self.constructUri(location))]
        return [valueNode.content]

    def convertValueNodeToPlainText(self, valueNode):
        return u"".join([
            htmlNode.decode("UTF-8")
            for htmlNode in self.convertValueNodeToHtmlNodes(valueNode)])


def getSchemaHolder(schemaAbsolutePath):
    """ Caches and returns the schema holder at schemaAbsolutePath
    """
    if schemaAbsolutePath not in _schemaHolders:
        schemaHolder = dataholders.DataHolder(
            pathFragment = schemaAbsolutePath, mimeType = "text/xml", isRootElder = True,
            containedFileSystem = filesystems.PartialFileSystem(schemaAbsolutePath))
        _schemaHolders[schemaAbsolutePath] = schemaHolder
    return _schemaHolders[schemaAbsolutePath]


def registerType(name, class_):
    objectTypeClasses[name] = class_


registerType("Identity", ObjectType)
registerType("Person", ObjectType)

elements.registerElement(
    namespaces.xsd.uri, "schema", Schema, holderClass = SchemaHolder)