tabularfile/tests/test_ods.py

# tabularfile - simple ods reader and writer
# Copyright (C) 2020 Entr'ouvert
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License as published
# by the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

import datetime
import io
import resource

from tabularfile import load, write, TabularFileError
from tabularfile.ods import LinkedValue

import pytest


def test_test1_ods():
    with load('tests/data/test1.ods', sheet=0) as tabfile:
        assert list(tabfile) == [[]]
        assert tabfile.sheets == ['Feuille1', 'Feuille2']

    with load('tests/data/test1.ods', sheet=1) as tabfile:
        assert list(tabfile) == [
            ['123'],
            ['20/06/20', '', '', '', 'efef'],
            ['20/06/20', '', '', '', '123'],
            [
                'je suis content',
                'je suis content',
                'je suis content',
                'je suis content',
                'je suis content',
                'je suis content',
            ],
            [],
            ['', '', '', 'https://www.entrouvert.com/'],
            [],
            [],
            [],
            ['', '', '', '', '1312'],
        ]
        assert tabfile.sheets == ['Feuille1', 'Feuille2']


def test_test1_ods_typed():
    with load('tests/data/test1.ods', sheet=1, typed=True) as tabfile:
        assert list(tabfile) == [
            [123],
            [datetime.date(2020, 6, 20), '', '', '', 'efef'],
            [datetime.date(2020, 6, 20), '', '', '', 123],
            [
                'je suis content',
                'je suis content',
                'je suis content',
                'je suis content',
                'je suis content',
                'je suis content',
            ],
            [],
            ['', '', '', 'https://www.entrouvert.com/'],
            [],
            [],
            [],
            ['', '', '', '', 1312],
        ]


ROWS = [
    [123],
    [datetime.date(2020, 6, 20), '', '', '', 'efef'],
    [datetime.date(2020, 6, 20), '', '', '', 123],
    [
        'je suis content',
        'je suis content',
        'je suis content',
        'je suis content',
        'je suis content',
        'je suis content',
    ],
    [],
    [],
    [],
    [],
    [],
    ['', '', '', '', 1312],
]


def test_test2_with_span():
    with load('tests/data/test2_with_span.ods', sheet=1, typed=True) as tabfile:
        assert tabfile.sheets == ['Feuille1', 'Feuille2']
        with pytest.raises(TabularFileError, match='fusioned cells are unsupported'):
            list(tabfile)


def test_writer():
    fd = io.BytesIO()
    with write(fd) as writer:
        writer.writerows(ROWS)
    with load(fd.getvalue(), typed=True) as tabfile:
        assert list(tabfile) == ROWS


def test_writer_cell_writer():
    fd = io.BytesIO()

    with write(fd) as writer:
        with writer.cell_writer as write_cell:
            write_cell('date')
            write_cell('count')
        with writer.cell_writer as write_cell:
            write_cell(datetime.date(2019, 12, 1), href='https://example.com/summary/2020/12/01/')
            write_cell(123, href='http://example.com')
        with writer.cell_writer as write_cell:
            write_cell(datetime.date(2020, 12, 1))
            write_cell(156)

    with load(fd.getvalue(), typed=True, xlink=True) as tabfile:
        rows = list(tabfile)
    assert rows == [
        ['date', 'count'],
        [
            LinkedValue(datetime.date(2019, 12, 1), 'https://example.com/summary/2020/12/01/'),
            LinkedValue(123.0, 'http://example.com')
        ],
        [datetime.date(2020, 12, 1), 156.0],
    ]


def test_massive_write(tmp_path):
    memory = resource.getrusage(resource.RUSAGE_THREAD).ru_maxrss
    with (tmp_path / 'massive.ods').open('wb') as fh:
        with write(fh) as writer:
            writer.writerows([1, 2, 3, 4] for i in range(100000))
            assert resource.getrusage(resource.RUSAGE_THREAD).ru_maxrss - memory < 1000


def test_massive_read(tmp_path):
    with (tmp_path / 'massive.ods').open('wb') as fh:
        with write(fh) as writer:
            writer.writerows([1, 2, 3, 4] for i in range(100000))

    with (tmp_path / 'massive.ods').open('rb') as fh:
        with load(fh) as tabfile:
            memory = resource.getrusage(resource.RUSAGE_THREAD).ru_maxrss
            for i, row in enumerate(tabfile):
                # we use less than 4 Kb for parsing 100000 lines
                assert resource.getrusage(resource.RUSAGE_THREAD).ru_maxrss - memory < 4000, 'row %s' % i