from bs4 import BeautifulSoup import html5lib import requests from combo.data.models import TextCell DOCS = { 'passerelle-connecteur-dev': "https://dev.entrouvert.org/projects/passerelle/wiki/D%C3%A9veloppement_d'un_connecteur.html", 'passerelle-json-data-store': "https://dev.entrouvert.org/projects/passerelle/wiki/Stockage_de_donn%C3%A9es_JSON.html", 'combo-json-cell': "https://dev.entrouvert.org/projects/combo/wiki/D%C3%A9veloppement_d'une_cellule_aliment%C3%A9e_par_JSON", } for cell in TextCell.objects.all(): if not cell.slug in DOCS: continue resp = requests.get(DOCS[cell.slug]) if resp.status_code != 200: continue document = BeautifulSoup(resp.content, 'html5lib') content = document.find('body') new_content = unicode(content).replace('', '').replace('', '') cell.text = new_content cell.save()