This repository has been archived on 2023-02-21. You can view files and clone it, but cannot push or open issues or pull requests.
expression/src/core/xmlschemas.py

703 lines
27 KiB
Python

# -*- coding: UTF-8 -*-
# Expression
# By: Frederic Peters <fpeters@entrouvert.com>
# Emmanuel Raviart <eraviart@entrouvert.com>
#
# Copyright (C) 2004 Entr'ouvert, Frederic Peters & Emmanuel Raviart
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
"""XML Schema."""
import datetime
import time
import dataholders
import elements
import filesystems
import logs
import namespaces
import stations
_schemaHolders = {}
objectTypeClasses = {}
class Schema(elements.Element):
""" An XML Schema definition.
Defines the structure of an document, and specifically field types.
"""
_schemaContext = None
def __init__(self, *a, **b):
""" Initializes a libxml2 schema context, ensuring the schema itself is valid.
"""
super(Schema, self).__init__(*a, **b)
parserContext = self.node.doc.schemaNewDocParserCtxt()
self._schemaContext = parserContext.schemaParse()
def buildXformsModelContext(self, context):
if context.schemas is None:
context.schemas = SchemasContext(
context.specimen, previous = context,
uriPathFragment = "schemas")
schemasContext = context.schemas
if schemasContext.schemas is None:
schemasContext.schemas = []
schemasContext.schemas.append(self)
def getAttributeInNodeType(self, value, node, name):
""" Returns a TypeContext for attribute "name" whose definition is a child of "node".
Argument "value" is a ModelContext to be used as the context specimen.
"""
attributeNode = self.getAttributeNodeInNode(node, name)
if attributeNode is not None:
return self.getNodeType(value, attributeNode)
def getAttributeNodeInNode(self, node, name):
""" Returns the matching "xsd:attribute" element node found as a child of "node".
"""
for node in self.evaluateXpath("xsd:attribute[@name = '%s']" % name.replace("'", "&apos;"), node):
return node
def getNodeType(self, value, node):
""" Returns a TypeContext for the element of attribute defined at "node".
Argument "value" is a ModelContext to be used as the context specimen.
Handles both type names (type="...") and embedded type definition.
"""
typeName = node.prop("type")
if typeName:
return self.getGlobalType(value, typeName)
for typeNode in self.evaluateXpath("*", node):
return self.newTypeContext(value, typeNode)
def getElementInNodeType(self, value, node, name):
""" Returns a TypeContext for element "name" whose definition is a child of node "node".
Argument "value" is a ModelContext to be used as the context specimen.
"""
elementNode = self.getElementNodeInNode(node, name)
if elementNode is not None:
return self.getNodeType(value, elementNode)
def getElementNodeInNode(self, node, name):
""" Returns the "name"-matching "xsd:element" element node amongst children of "node".
"""
for node in self.evaluateXpath("xsd:element[@name = '%s']" % name.replace("'", "&apos;"), node):
return node
def getGlobalElementNode(self, name, scannedSchemas = None):
""" Seeks and returns the definition node for element "name" in this schemas first, then in included schemas.
Argument scannedSchemas is a used internaly to avoid infinite recursive loop.
"""
if scannedSchemas is None:
scannedSchemas = []
elif self in scannedSchemas:
return None
scannedSchemas.append(self)
elementNode = self.getElementNodeInNode(self.node, name)
if elementNode is not None:
return elementNode
for schemaLocation in self.getIncludeLocations():
schema = self.getSchemaAtLocation(schemaLocation)
return schema.getGlobalElementNode(name, scannedSchemas = scannedSchemas)
def getGlobalElementType(self, value, name, scannedSchemas = None):
""" Returns a TypeContext for the element "name" defined in this schema.
Argument "value" is a ModelContext to be used as the context specimen.
Argument scannedSchemas is a used internaly to avoid infinite recursive loop.
"""
elementNode = self.getGlobalElementNode(name, scannedSchemas = scannedSchemas)
if elementNode is not None:
return self.getNodeType(value, elementNode)
def getGlobalType(self, value, name, scannedSchemas = None):
""" Returns a TypeContext for a type called "name" within all those we know.
Argument "value" is a ModelContext to be used as the context specimen.
Argument scannedSchemas is a used internaly to avoid infinite recursive loop.
"""
# remember that I was here and avoid infinite recursive loop
if scannedSchemas is None:
scannedSchemas = []
elif self in scannedSchemas:
return None
scannedSchemas.append(self)
# see whether we know this type as a standard xsd type
# FIXME.
if name.startswith("xsd:") or name == EmailAddress.name:
typeClasses = {
"xsd:boolean": Boolean,
"xsd:date": Date,
"xsd:dateTime": DateTime,
"xsd:integer": Integer,
"xsd:string": String,
"xsd:anyURI": String,
EmailAddress.name: EmailAddress,
}
if name in typeClasses:
typeClass = typeClasses[name]
else:
logs.debug("""Using class "String" for type "%s".""" % name)
typeClass = String
return TypeContext(typeClass(), value, previous = value)
# seek the definition of type "name" within me
globalType = self.getTypeInNode(value, self.node, name)
if globalType is not None:
return globalType
# recurse in included schemas
for schemaLocation in self.getIncludeLocations():
schema = self.getSchemaAtLocation(schemaLocation)
globalType = schema.getGlobalType(value, name, scannedSchemas = scannedSchemas)
if globalType is not None:
return globalType
def getIncludeLocations(self):
""" Returns the list of included and imported schema locations.
"""
return [
node.content
for node in self.evaluateXpath("(xsd:include|xsd:import)/@schemaLocation")
]
def getSchemaAtLocation(self, schemaLocation):
""" Returns the schema object found at location schemaLocation.
"""
# do not use walkToLocation, convert to absolute path and get it
schemaAbsolutePath = self.convertRelativeLocationToAbsolutePath(schemaLocation)
if schemaAbsolutePath is None:
logs.debug('Missing schema file "%s".' % schemaLocation)
return None
schemaHolder = getSchemaHolder(schemaAbsolutePath)
if schemaHolder is None:
logs.debug('Missing schema file "%s".' % schemaLocation)
return None
return schemaHolder.getRootElement()
def getTargetNamespace(self):
""" Returns the target namespace uri.
"""
return self.node.prop("targetNamespace")
def getTypeInNode(self, value, node, name):
""" Returns a TypeContext for element or attribute "name".
Argument "value" is a ModelContext to be used as the context specimen.
Argument "node" is the context node (where to look for the element or attribute declaration).
"""
name = name.replace("'", "&apos;")
for typeNode in self.evaluateXpath("*[@name = '%s']" % name, node):
return self.newTypeContext(value, typeNode)
targetNamespacePrefix = namespaces.getName(self.getTargetNamespace())
if name[:len(targetNamespacePrefix) + 1] == "%s:" % targetNamespacePrefix:
name = name[len(targetNamespacePrefix) + 1:]
for typeNode in self.evaluateXpath("*[@name = '%s']" % name, node):
return self.newTypeContext(value, typeNode)
def newContext(self, specimen, *attributes, **keywords):
""" Returns a SchemaContext.
"""
return SchemaContext(self, specimen, *attributes, **keywords)
def newTypeContext(self, value, node):
""" Creates and returns a TypeContext.
Its prototype is a Type element build around argument "node".
Its specimen is the ModelContext "value" argument.
"""
# FIXME: Handle other namespaces.
type = Type(node, previous = self, owner = self)
name = type.name
if name in objectTypeClasses:
typeClass = objectTypeClasses[name]
else:
#logs.debug("""Using class "Type" for type "%s".""" % name)
typeClass = Type
if typeClass != Type:
type.__class__ = typeClass
return TypeContext(type, value, previous = value)
def validateDocument(self, doc, options = 0):
""" Validates a libxml2 document. Returns True on pass.
"""
validationContext = self._schemaContext.schemaNewValidCtxt()
validationContext.schemaSetValidOptions(options)
return doc.schemaValidateDoc(validationContext) == 0
def validateElement(self, node, options = 0):
""" Validates a libxml2 element node. Returns True on pass.
"""
validationContext = self._schemaContext.schemaNewValidCtxt()
validationContext.schemaSetValidOptions(options)
return node.schemaValidateOneElement(validationContext) == 0
class SchemaContext(stations.AbstractContext):
""" A Context with a Type as prototype and a ModelContext as specimen.
"""
def getGlobalElementType(self, value, name, scannedSchemas = None):
return self.prototype.getGlobalElementType(value, name, scannedSchemas = scannedSchemas)
class SchemaHolder(dataholders.XmlHolder):
""" A .xsd file.
"""
defaultFileNameExtension = ".xsd"
class SchemasContext(stations.AbstractContext):
schemas = None
def __init__(self, specimen, previous = None, uriPathFragment = None):
super(SchemasContext, self).__init__(
None, specimen, previous = previous, uriPathFragment = uriPathFragment)
def getValueType(self, value):
if self.schemas is None:
return None
elementName = value.node.name
scannedSchemas = []
for schema in self.schemas:
elementType = schema.getGlobalElementType(
value, elementName, scannedSchemas = scannedSchemas)
if elementType is not None:
return elementType
scannedSchemas.append(self)
return None
def getPrototype(self):
return self
prototype = property(getPrototype)
class AbstractType:
""" Field values are manipulated through their type class methods.
This is the base class for Integer, String, etc, but also custom type classes (Identity, Session etc.)
"""
def convertSubmissionValueToContent(self, submissionValue):
""" Form submitted value -> XML storage format
"""
return submissionValue
def convertValueNodeToHtmlNodes(self, valueNode):
""" XML node -> list of HTML nodes
"""
return [valueNode.content]
def convertValueNodeToHtmlAttributeValue(self, valueNode):
""" XML node -> HTML attribute value
"""
return valueNode.content
def convertValueNodeToPlainText(self, valueNode):
""" XML node -> plain text representation
"""
return valueNode.content.decode("UTF-8")
class Type(AbstractType, elements.Element):
""" A non-standard type.
"""
def getAttributeType(self, value, attributeName):
""" Returns a TypeContext for attribute "attributeName".
Argument "value" is a ModelContext to be used as the context specimen.
"""
# FIXME: Works only for:
# - xsd:complexType/xsd:attribute
# - xsd:complexType/xsd:complexContent/xsd:extension/xsd:attribute
schema = self.getSchema()
attributeType = schema.getAttributeInNodeType(value, self.node, attributeName)
if attributeType is not None:
return attributeType
complexContentNodes = self.evaluateXpath("xsd:complexContent")
if complexContentNodes:
complexContentNode = complexContentNodes[0]
extensionNodes = self.evaluateXpath("xsd:extension", complexContentNode)
if extensionNodes:
# First look for the attribute type in base type.
extensionNode = extensionNodes[0]
baseTypeNameNodes = self.evaluateXpath("@base", extensionNode)
if not baseTypeNameNodes:
return None
baseTypeName = baseTypeNameNodes[0].content
baseType = schema.getGlobalType(value, baseTypeName)
if baseType is None:
return None
attributeType = baseType.prototype.getAttributeType(
value, attributeName)
if attributeType is not None:
return attributeType
# Then look for the attribute type in extension.
return schema.getAttributeInNodeType(
value, extensionNode, attributeName)
return None
return None
def getChildType(self, value, childName):
""" Returns a TypeContext for element "childName".
Argument "value" is a ModelContext to be used as the context specimen.
"""
# FIXME: Works only for:
# - xsd:complexType/xsd:sequence
# - xsd:complexType/xsd:complexContent/xsd:extension/xsd:sequence
# - xsd:complexType/xsd:complexContent/xsd:extension/xsd:sequence/xsd:choice
# - xsd:complexType/xsd:complexContent/xsd:extension/xsd:sequence/xsd:group/@ref -> xsd:group[@ref]/xsd:sequence/xsd:element
# remove the namespace prefix
if "|" in childName:
for name in childName.split("|"):
type = self.getChildType(value, name)
if type is not None:
return type
if ":" in childName:
childName = childName[childName.index(':') + 1:]
schema = self.schema
sequenceNodes = self.evaluateXpath("xsd:sequence")
if sequenceNodes:
sequenceNode = sequenceNodes[0]
return schema.getElementInNodeType(value, sequenceNode, childName)
complexContentNodes = self.evaluateXpath("xsd:complexContent")
if complexContentNodes:
complexContentNode = complexContentNodes[0]
extensionNodes = self.evaluateXpath("xsd:extension", complexContentNode)
if extensionNodes:
extensionNode = extensionNodes[0]
# First look for the attribute type in base type.
baseTypeName = extensionNode.prop("base")
if baseTypeName is None:
return None
baseType = schema.getGlobalType(value, baseTypeName)
if baseType is None:
return None
childType = baseType.prototype.getChildType(value, childName)
if childType is not None:
return childType
# Then look for the attribute type in extension.
sequenceNodes = self.evaluateXpath("xsd:sequence", extensionNode)
for sequenceNode in sequenceNodes:
elementType = schema.getElementInNodeType(value, sequenceNode, childName)
if elementType is not None:
return elementType
choiceNodes = self.evaluateXpath("xsd:choice", sequenceNode)
for choiceNode in choiceNodes:
elementType = schema.getElementInNodeType(value, choiceNode, childName)
if elementType is not None:
return elementType
groupNodes = self.evaluateXpath("xsd:group", sequenceNode)
for groupNode in groupNodes:
ref = groupNode.prop("ref")
if ref:
sequenceNodes = schema.evaluateXpath("xsd:group[@name=\"%s\"]/xsd:sequence" % ref)
if sequenceNodes:
sequenceNode = sequenceNodes[0]
elementType = schema.getElementInNodeType(value, sequenceNode, childName)
if elementType is not None:
return elementType
return None
def getName(self):
""" Returns the name of this Type.
"""
return self.node.prop("name")
def getSchema(self):
""" Returns the schema where this Type is defined.
"""
return self.getParent()
def getTypeAtXpath(self, value, xpath):
""" Returns a TypeContext for the element or attribute at "xpath" in the specimen of ModelContext "value".
"""
if not xpath or xpath == ".":
return self
splitedXpath = xpath.split("/", 1)
name = splitedXpath[0]
assert name
if "[" in name:
if name[0] == "(":
# case of a "(ns:a|ns:b|ns:c)[3]" expression
# try finding which of ns:a, ns:b and ns:c is the right one to look for
for node in value.specimen.specimen.evaluateXpath(name):
name = node.name
# FIXME: prefix name with namespace name
break
else:
name = name[:name.index("[")]
elif "|" in name:
# case of a "ns:a|ns:b|ns:c" expression
# try finding which of ns:a, ns:b and ns:c is the right one to look for
for node in value.specimen.specimen.evaluateXpath(name):
name = node.name
break
if name[0] == "@":
# Attribute
subType = self.getAttributeType(value, name[1:])
else:
# Child
subType = self.getChildType(value, name)
if subType is None:
logs.info(
"""Unknown type for %s in "%s". Using xsd:string instead."""
% (name, self.name))
return TypeContext(String(), value, previous = value)
if len(splitedXpath) > 1:
xpath = splitedXpath[1]
if xpath:
return subType.getTypeAtXpath(value, xpath)
return subType
name = property(getName)
schema = property(getSchema)
class TypeContext(stations.AbstractContext):
""" Tools to manipulate a value depending on its type.
It maps its methods to its prototype's so it can be used in place of its Type.
"""
def convertSubmissionValueToContent(self, submissionValue):
return self.prototype.convertSubmissionValueToContent(submissionValue)
def convertValueNodeToHtmlNodes(self, valueNode):
return self.prototype.convertValueNodeToHtmlNodes(valueNode)
def convertValueNodeToHtmlAttributeValue(self, valueNode):
return self.prototype.convertValueNodeToHtmlAttributeValue(valueNode)
def convertValueNodeToPlainText(self, valueNode):
return self.prototype.convertValueNodeToPlainText(valueNode)
def getTypeAtXpath(self, value, xpath):
return self.prototype.getTypeAtXpath(value, xpath)
# Standard Types
class AbstractStandardType(AbstractType):
""" Base class for all known XML Schema standard types (string, integer ...)
"""
name = None # To override.
class Boolean(AbstractStandardType):
name = "xsd:boolean"
def convertSubmissionValueToContent(self, submissionValue):
# unchecked checkboxes return None
if submissionValue is None:
submissionValue = "0"
if not submissionValue in ("0", "1", "false", "true"):
raise ValueError(
"Invalid literal for boolean: %s" % submissionValue)
return submissionValue
def convertValueNodeToHtmlNodes(self, valueNode):
content = valueNode.content
if content in ("0", "false"):
value = "false"
elif content in ("1", "true"):
value = "true"
else:
raise ValueError("Invalid literal for boolean: %s" % content)
return [value]
def convertValueNodeToHtmlAttributeValue(self, valueNode):
content = valueNode.content
if content in ("0", "false"):
value = "false"
elif content in ("1", "true"):
value = "true"
else:
raise ValueError("Invalid literal for boolean: %s" % content)
return value
def convertValueNodeToPlainText(self, valueNode):
content = valueNode.content
if content in ("0", "false"):
value = u"false"
elif content in ("1", "true"):
value = u"true"
else:
raise ValueError("Invalid literal for boolean: %s" % content)
return value
class Date(AbstractStandardType):
name = "xsd:date"
def convertSubmissionValueToContent(self, submissionValue):
if submissionValue == "":
return None
for format in (
"%d/%m/%Y",
"%d/%m/%y",
"%Y-%m-%d",
"%y-%m-%d",
):
try:
(year, month, day) = time.strptime(submissionValue, format)[:3]
except ValueError:
continue
break
else:
raise ValueError("Invalid literal for date: %s" % submissionValue)
date = datetime.date(year, month, day)
return date.isoformat()
def convertValueNodeToHtmlNodes(self, valueNode):
return [self.convertValueNodeToPlainText(valueNode)]
def convertValueNodeToHtmlAttributeValue(self, valueNode):
return self.convertValueNodeToPlainText(valueNode)
def convertValueNodeToPlainText(self, valueNode):
content = valueNode.content
if content:
(year, month, day) = time.strptime(content, "%Y-%m-%d")[:3]
return "%02d/%02d/%04d" % (day, month, year)
else:
return ""
class DateTime(Date):
name = "xsd:dateTime"
def convertSubmissionValueToContent(self, submissionValue):
if submissionValue == "":
return None
formats = []
for dateFormat in (
"%d/%m/%Y",
"%d/%m/%y",
"%Y-%m-%d",
"%y-%m-%d",
):
for timeFormat in (
"%H:%M:%S",
"%H:%M",
"",
):
for separator in (" ", "T", ""):
formats.append("%s%s%s" % (dateFormat, separator, timeFormat))
for format in formats:
try:
(year, month, day, hour, minute, second) = time.strptime(submissionValue[:19], format)[:6]
except ValueError:
continue
break
else:
raise ValueError("Invalid literal for dateTime: %s" % submissionValue)
date = datetime.datetime(year, month, day, hour, minute, second)
return date.isoformat()
def convertValueNodeToPlainText(self, valueNode):
content = valueNode.content
if content:
(year, month, day, hour, minute, second) = time.strptime(content[:19], "%Y-%m-%dT%H:%M:%S")[:6]
return "%02d/%02d/%04d %02d:%02d:%02d" % (day, month, year, hour, minute, second)
else:
return ""
class Integer(AbstractStandardType):
name = "xsd:integer"
def convertSubmissionValueToContent(self, submissionValue):
value = int(submissionValue)
return str(value)
def convertValueNodeToHtmlNodes(self, valueNode):
content = valueNode.content
value = int(content)
return [str(value)]
def convertValueNodeToHtmlAttributeValue(self, valueNode):
content = valueNode.content
value = int(content)
return str(value)
def convertValueNodeToPlainText(self, valueNode):
content = valueNode.content
value = int(content)
return unicode(value)
class String(AbstractStandardType):
name = "xsd:string"
# Custom Types
class EmailAddress(String):
name = "yep:emailAddress"
def convertSubmissionValueToContent(self, submissionValue):
value = submissionValue.replace(" ", "").lower()
if value == "": return value
if value.count("@") == 1:
import re
if not re.match("[][()<>|;^,/\200-\377]", value):
(name, domain) = value.split("@")
if "." in domain:
domainNames = domain.split(".")
if len(domainNames) >= 2 and "" not in domainNames:
return value
raise ValueError("Invalid literal for email address: %s" % submissionValue)
class ObjectType(Type):
def convertValueNodeToHtmlNodes(self, valueNode):
# FIXME: To upgrade. No currentActionHandler anymore.
## sourceNodes = environs.getVar("currentActionHandler").evaluateXpath(
## "@src", valueNode)
## if sourceNodes:
## sourceLocation = sourceNodes[0].content
## sourceHolder = self.walkToLocation(sourceLocation)
## if sourceHolder is not None:
## return [html.a(sourceHolder.simpleLabel,
## href = self.constructUri(location))]
return [valueNode.content]
def convertValueNodeToPlainText(self, valueNode):
return u"".join([
htmlNode.decode("UTF-8")
for htmlNode in self.convertValueNodeToHtmlNodes(valueNode)])
def getSchemaHolder(schemaAbsolutePath):
""" Caches and returns the schema holder at schemaAbsolutePath
"""
if schemaAbsolutePath not in _schemaHolders:
schemaHolder = dataholders.DataHolder(
pathFragment = schemaAbsolutePath, mimeType = "text/xml", isRootElder = True,
containedFileSystem = filesystems.PartialFileSystem(schemaAbsolutePath))
_schemaHolders[schemaAbsolutePath] = schemaHolder
return _schemaHolders[schemaAbsolutePath]
def registerType(name, class_):
objectTypeClasses[name] = class_
registerType("Identity", ObjectType)
registerType("Person", ObjectType)
elements.registerElement(
namespaces.xsd.uri, "schema", Schema, holderClass = SchemaHolder)