From 1374d3aa12715e29c5fdf4bf49ccf05023635171 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mika=C3=ABl=20Ates?= Date: Fri, 19 Dec 2014 16:55:56 +0100 Subject: [PATCH] Add very first parser. --- cv2parser/__init__.py | 0 cv2parser/cv2parser.py | 271 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 271 insertions(+) create mode 100644 cv2parser/__init__.py create mode 100644 cv2parser/cv2parser.py diff --git a/cv2parser/__init__.py b/cv2parser/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/cv2parser/cv2parser.py b/cv2parser/cv2parser.py new file mode 100644 index 0000000..8993d81 --- /dev/null +++ b/cv2parser/cv2parser.py @@ -0,0 +1,271 @@ +# -*- coding: utf-8 -*- +''' + cv2parser - Carte Vitale 2 XML file parser + + Copyright (C) 2014 Entr'ouvert + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as + published by the Free Software Foundation, either version 3 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +''' +import re +import os + +from lxml import etree +from datetime import datetime + + +def get_xml_child_element_text(element, child_name): + try: + return element.find(child_name).text + except: + return None + +def camel_to_pep8(name): + s_tmp = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name) + return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s_tmp).lower() + +def parse_date(str_date): + fmt = '%d%m%Y' + return datetime.strptime(str_date, fmt) + +def parse_file_date(str_date): + fmt = '%Y-%d-%m-%H-%M-%S' + return datetime.strptime(str_date, fmt) + + +ELEMENT_NAMES = { + 'ident': + [ + 'nomUsuel', + 'nomPatronymique', + 'prenomUsuel', + 'naissance', + 'nir', + 'adresse', + 'rangDeNaissance', + 'nirCertifie', + 'dateCertification' + ], + 'amo': + [ + 'qualBenef', + 'codeRegime', + 'caisse', + 'centreGestion', + 'codeGestion', + 'libelleExo', + 'infoCompl', + 'centreCarte', + 'medecinTraitant' + ], + 'mutuelle': + [ + 'numIdent', + 'indicTraitement', + 'codeSTS', + 'donneesCompl' + ], + 'amc': + [ + 'numComplB2', + 'numComplEDI', + 'numAdherent', + 'indicTraitement', + 'codeRoutageFlux', + 'identHote', + 'nomDomaine', + 'codeSTS', + 'donneesCompl' + ], + 'cmu': + [ + 'typeCMU' + ], + 'listeAt': + [], + 'e112': + [ + 'fomulaire', + 'typeExp', + 'article', + 'activite', + 'date' + ], + } + + +class Beneficiaire(): + def __init__(self, element): + for root_name, names in ELEMENT_NAMES.items(): + root_element = element.find(root_name) + for name in names: + setattr(self, root_name + '_' + camel_to_pep8(name), + get_xml_child_element_text(root_element, name)) + self.assure = False + if self.amo_qual_benef == '0': + self.assure = True + self.ident_naissance = None + try: + self.ident_naissance = element.find('ident').find('naissance').\ + find('date').text + except: + pass + self.ident_adresse = "" + for i in range(1, 6): + try: + self.ident_adresse += element.find('ident').find('adresse').\ + find('ligne' + str(i)).text + '\n' + except: + pass + try: + i = 1 + for periode_element in element.find('amo').\ + find('listePeriodesDroits').findall('element'): + try: + setattr(self, 'amo_liste_periodes_droits_' + str(i) + + '_debut', periode_element.find('debut').text) + setattr(self, 'amo_liste_periodes_droits_' + str(i) + + '_fin', periode_element.find('fin').text) + i += 1 + except: + pass + except: + pass + try: + self.amo_service_code_service = element.find('amo').\ + find('service').find('codeService').text + self.amo_service_periode_service_debut = element.find('amo').\ + find('service').find('periodeService').find('debut').text + self.amo_service_periode_service_fin = element.find('amo').\ + find('service').find('periodeService').find('fin').text + except: + pass + + self.caisse = ''.join(['0' for i in range(0, + 3-len(self.amo_caisse))]) + self.amo_caisse + self.code_regime = ''.join(['0' for i in range(0, + 2-len(self.amo_code_regime))]) + self.amo_code_regime + self.centre_carte = ''.join(['0' for i in range(0, + 4-len(self.amo_centre_carte))]) + self.amo_centre_carte + self.naissance = None + if self.ident_naissance: + self.naissance = parse_date(self.ident_naissance) + self.date_certification = None + if self.ident_date_certification: + self.date_certification = \ + parse_date(self.ident_date_certification) + self.nir = self.ident_nir[0:13] + self.rang_de_naissance = int(self.ident_rang_de_naissance) + self.nom = self.ident_nom_usuel[0] + \ + self.ident_nom_usuel[1:].lower() + self.prenom = self.ident_prenom_usuel[0] + \ + self.ident_prenom_usuel[1:].lower() + self.gender = None + if self.nir[0] in ('1', '2'): + self.gender = int(self.nir[0]) + + def __unicode__(self): + values = "" + if self.assure: + values += 'Assure\n' + else: + values += 'Non assure\n' + for root_name, names in ELEMENT_NAMES.items(): + for name in names: + value = getattr(self, root_name + '_' + camel_to_pep8(name)) + if value: + values += root_name + '_' + camel_to_pep8(name) + \ + ': ' + value + '\n' + if root_name == 'amo': + for i in range(1, 4): + try: + value = getattr(self, 'amo_liste_periodes_droits_' + \ + str(i) + '_debut') + if value: + values += 'amo_liste_periodes_droits_' + \ + str(i) + '_debut' + ': ' + value + '\n' + value = getattr(self, 'amo_liste_periodes_droits_' + \ + str(i) + '_fin') + if value: + values += 'amo_liste_periodes_droits_' + \ + str(i) + '_fin' + ': ' + value + '\n' + except: + pass + try: + values += 'amo_service_code_service' + \ + ': ' + self.amo_service_code_service + '\n' + values += 'amo_service_periode_service_debut' + \ + ': ' + self.amo_service_periode_service_debut + '\n' + values += 'amo_service_periode_service_fin' + \ + ': ' + self.amo_service_periode_service_fin + '\n' + except: + pass + return values + + +class CarteVitale(): + def __init__(self, filename): + self.doc = etree.parse(filename)#t_AsnDonneesVitale + self.beneficiaires = self._read_beneficiaires() + tech_element = self.doc.find('tech') + self.fin_validite = None + try: + self.fin_validite = tech_element.find('finValidite').text + except: + pass + self.num_serie = tech_element.find('numSerie').text + self.acces_compl = None + try: + self.acces_compl = tech_element.find('accesCompl').text + except: + pass + + def _read_beneficiaires(self): + liste_benef_element = self.doc.find('listeBenef') + beneficiaire_elements = liste_benef_element.findall('element') + beneficiaires = list() + i = 0 + for beneficiaire_element in beneficiaire_elements: + beneficiaire = Beneficiaire(beneficiaire_element) + beneficiaire.id = i + beneficiaires.append(beneficiaire) + i += 1 + return beneficiaires + + def get_beneficiaire(self, identifier): + for beneficiaire in self.beneficiaires: + if beneficiaire.id == identifier: + return beneficiaire + return None + + def pretty_print_xml(self): + print(etree.tostring(self.doc, pretty_print=True).decode('utf-8')) + + def print_carte(self): + values = "Numéro de série : %s\n" % self.num_serie + if self.fin_validite: + values += 'Fin validité : %s\n' % self.fin_validite + if self.acces_compl: + values += 'Accès complémentaire : %s\n' % self.acces_compl + print(values) + for beneficiaire in self.beneficiaires: + print(beneficiaire.__unicode__()) + + +if __name__ == '__main__': + import glob + for fn in glob.glob('../xml_test_files/*'): + if not os.path.isdir(fn): + cv = CarteVitale(fn) + cv.pretty_print_xml() + cv.print_carte()