pdf: add support for radio buttons (#75373)

This commit is contained in:
Benjamin Dauvergne 2023-03-13 23:08:17 +01:00
parent 11dcc6c2e7
commit 52356c9814
6 changed files with 102 additions and 14 deletions

View File

@ -40,6 +40,10 @@ class FieldsMappingEditForm(forms.ModelForm):
elif field.widget_type == 'text':
help_text = _('text template')
field_class = TemplateField
elif field.widget_type == 'radio':
values = ', '.join('"%s"' % value for value in field.radio_possible_values)
help_text = _('text template, possibles values %s') % values
field_class = TemplateField
else:
continue
label = _('field {number} ({help_text})').format(number=i + 1, help_text=help_text)

View File

@ -250,6 +250,9 @@ class Resource(BaseResource):
value = evaluate_condition(mapping_template, post_data)
elif field.widget_type == 'text':
value = evaluate_template(mapping_template, post_data)
elif field.widget_type == 'radio':
value = evaluate_template(mapping_template, post_data)
self.logger.info('field=%r value=%r', field, value)
else:
raise NotImplementedError
if value is not None:

View File

@ -59,7 +59,7 @@ class PageThumbnailView(ResourceView):
thumbnail = page.thumbnail_png()
image = PIL.Image.open(io.BytesIO(thumbnail))
draw = PIL.ImageDraw.Draw(image, 'RGBA')
for i, (field, area_rect) in enumerate(page.thumbnail_field_rects()):
for i, field, area_rect in page.thumbnail_field_rects():
draw.rectangle(area_rect, fill=(255, 0, 0, 50))
x = area_rect.x1
y = (area_rect.y1 + area_rect.y2) / 2 - 5

View File

@ -35,6 +35,26 @@ class Rect(typing.NamedTuple):
x2: float
y2: float
@classmethod
def from_pdf_annotation(cls, annotation):
return cls(*map(float, annotation.Rect))
def rect_compare(rect1, rect2):
'''Sort rect top to bottom and left to right, PDF origin is in the
bottom-left corner.
Rect on the same horizontal band are considered at the same height.
'''
if -rect1.y1 < -rect2.y2:
return -1
if -rect2.y1 < -rect1.y1:
return 1
if rect1.x1 < rect2.x1:
return -1
if rect1.x1 > rect2.x1:
return 1
return 0
class FieldFlags(int):
@property
@ -71,6 +91,12 @@ class Widget:
@property
def widget_type(self):
if (
self.field_type == pdfrw.PdfName.Btn
and self.field_flags.is_radio
and not self.field_flags.is_push_button
):
return 'radio'
elif (
self.field_type == pdfrw.PdfName.Btn
and not self.field_flags.is_radio
and not self.field_flags.is_push_button
@ -87,7 +113,10 @@ class Widget:
@property
def rects(self):
return (Rect(*map(float, self.annotation.Rect)),)
if self.widget_type == 'radio':
return [Rect.from_pdf_annotation(kid) for kid in self.kids_ordered_by_rect]
else:
return [Rect.from_pdf_annotation(self.annotation)]
@property
def digest_id(self):
@ -111,6 +140,22 @@ class Widget:
true_values.remove(pdfrw.PdfName.Off)
return true_values[0]
@property
def kids_ordered_by_rect(self):
assert self.widget_type == 'radio'
kids = list(self.annotation.Kids or [])
def compare(kid1, kid2):
return rect_compare(Rect.from_pdf_annotation(kid1), Rect.from_pdf_annotation(kid2))
kids.sort(key=functools.cmp_to_key(compare))
return kids
@property
def radio_possible_values(self):
assert self.widget_type == 'radio'
return list(list(kid.AP.N.keys())[0][1:] for kid in self.kids_ordered_by_rect if kid.AP and kid.AP.N)
@property
def value(self):
if self.widget_type == 'text':
@ -119,6 +164,8 @@ class Widget:
return ''
elif self.widget_type == 'checkbox':
return self.annotation.V == self.checkbox_true_value
elif self.widget_type == 'radio':
return self.annotation.V.lstrip('/') if self.annotation.V else None
def set(self, value):
# allow rendering of values in Acrobat Reader
@ -129,6 +176,16 @@ class Widget:
elif self.widget_type == 'checkbox':
bool_value = self.checkbox_true_value if value else pdfrw.PdfName.Off
self.annotation.update(pdfrw.PdfDict(V=bool_value, AS=bool_value))
elif self.widget_type == 'radio':
if value not in self.radio_possible_values:
raise ValueError(f'"{value}" is not one of {self.radio_possible_values}')
radio_value = pdfrw.PdfName(value)
for kid in self.annotation.Kids:
if kid.AP and kid.AP.N and radio_value in kid.AP.N:
kid.update(pdfrw.PdfDict(AS=radio_value))
else:
kid.update(pdfrw.PdfDict(AS=pdfrw.PdfName.Off))
self.annotation.update(pdfrw.PdfDict(V=radio_value))
@classmethod
def from_pdf_widget(cls, page, pdf_widget):
@ -139,6 +196,9 @@ class Widget:
return None
return widget
def __repr__(self):
return f'<Widget {self.name!r} : {self.widget_type}>'
@dataclasses.dataclass
class Page:
@ -156,6 +216,7 @@ class Page:
def widgets():
'''Find annotation which are widgets, if Subtype is not defined,
look at the parent (case of radio fields)'''
seen = set()
for annotation in self.page.Annots or ():
field = annotation
if field.Subtype != pdfrw.PdfName.Widget:
@ -165,6 +226,11 @@ class Page:
# skip field without name
if not field.T:
continue
# radio checkboxes have the same parent, to prevent duplicate
# fields
if field.T in seen:
continue
seen.add(field.T)
yield field
fields = []
@ -172,7 +238,11 @@ class Page:
widget = Widget.from_pdf_widget(self, widget)
if widget:
fields.append(widget)
fields.sort(key=lambda field: (-field.rect[1], field.rect[0]))
def compare(field1, field2):
return rect_compare(field1.rect, field2.rect)
fields.sort(key=functools.cmp_to_key(compare))
return fields
@property
@ -211,20 +281,20 @@ class Page:
media_height = media_box.y2 - media_box.y1
height = int(width / media_width * media_height)
for field in self.fields:
field_rect = field.rect
yield field, Rect(
# PDF coordinates origin is in the bottom-left corner but img
# tag origin is in the top-left corner
x1=int((field_rect.x1 - media_box.x1) / media_width * width),
y1=int((media_box.y2 - field_rect.y1) / media_height * height),
x2=int((field_rect.x2 - media_box.x1) / media_width * width),
y2=int((media_box.y2 - field_rect.y2) / media_height * height),
)
for i, field in enumerate(self.fields):
for field_rect in field.rects:
yield i, field, Rect(
# PDF coordinates origin is in the bottom-left corner but img
# tag origin is in the top-left corner
x1=int((field_rect.x1 - media_box.x1) / media_width * width),
y1=int((media_box.y2 - field_rect.y1) / media_height * height),
x2=int((field_rect.x2 - media_box.x1) / media_width * width),
y2=int((media_box.y2 - field_rect.y2) / media_height * height),
)
def fields_image_map(self, width=None, sep='\n', id_prefix='', id_suffix=''):
tags = []
for field, area_rect in self.thumbnail_field_rects(width=width):
for _, field, area_rect in self.thumbnail_field_rects(width=width):
coords = ','.join(map(str, area_rect))
tags.append(
f'<area shape="rect" '

Binary file not shown.

View File

@ -93,3 +93,14 @@ def test_field_set(pdf):
else:
raise NotImplementedError
assert check == {1, 2}
def test_radio_button():
with open('tests/data/cerfa_14011-02.pdf', 'rb') as fd:
pdf = PDF(content=fd)
radio = [field for field in pdf.page(0).fields if field.name == 'topmostSubform[0].Page1[0].Gender[0]']
assert len(radio) == 1
radio = radio[0]
assert radio.radio_possible_values == ['H', 'F']
radio.set('H')
assert radio.value == 'H'