from bs4 import BeautifulSoup import html5lib import requests from combo.data.models import TextCell DOCS = { 'passerelle-connecteur-dev': "https://dev.entrouvert.org/projects/passerelle/wiki/D%C3%A9veloppement_d'un_connecteur.html", 'passerelle-json-data-store': "https://dev.entrouvert.org/projects/passerelle/wiki/Stockage_de_donn%C3%A9es_JSON.html", 'combo-json-cell': "https://dev.entrouvert.org/projects/combo/wiki/D%C3%A9veloppement_d'une_cellule_aliment%C3%A9e_par_JSON.html", 'developer-installation': "https://dev.entrouvert.org/projects/publik-devinst/wiki/Installation_d'un_environnement_de_développement_local.html", 'redmine-contribuer-a-publik': "https://dev.entrouvert.org/projects/publik/wiki/Contribuer_%C3%A0_Publik.html", } for cell in TextCell.objects.all(): if not cell.slug in DOCS: continue resp = requests.get(DOCS[cell.slug]) if resp.status_code != 200: continue document = BeautifulSoup(resp.content, 'html5lib') content = document.find('body') new_content = unicode(content).replace('', '').replace('', '') cell.text = new_content cell.save()