From 0e46f54108e04a535de5a951d96216dc1aece5ab Mon Sep 17 00:00:00 2001
From: Patrick Lauber <digi@treepy.com>
Date: Mon, 10 Dec 2012 16:58:44 +0100
Subject: [PATCH] setup py

---
 djangocms_text_ckeditor/html.py | 10 ++++++++++
 requirements.txt                |  1 +
 setup.py                        |  2 +-
 3 files changed, 12 insertions(+), 1 deletion(-)
diff --git a/djangocms_text_ckeditor/html.py b/djangocms_text_ckeditor/html.py
index 1af3abf..9cb04b1 100644
--- a/djangocms_text_ckeditor/html.py
+++ b/djangocms_text_ckeditor/html.py
@@ -1,10 +1,13 @@
 # -*- coding: utf-8 -*-
 from html5lib import sanitizer, serializer, treebuilders, treewalkers
 import html5lib
+import re
 
 DEFAULT_PARSER = html5lib.HTMLParser(tokenizer=sanitizer.HTMLSanitizer,
                                      tree=treebuilders.getTreeBuilder("dom"))
 
+RE_IMG = re.compile(r'<img[^>]*\ssrc="(.*?)"', re.IGNORECASE)
+
 def clean_html(data, full=True, parser=DEFAULT_PARSER):
     """
     Cleans HTML from XSS vulnerabilities using html5lib
@@ -12,6 +15,7 @@ def clean_html(data, full=True, parser=DEFAULT_PARSER):
     If full is False, only the contents inside <body> will be returned (without
     the <body> tags).
     """
+    data = extract_images(data)
     if full:
         dom_tree = parser.parse(data)
     else:
@@ -21,3 +25,9 @@ def clean_html(data, full=True, parser=DEFAULT_PARSER):
     s = serializer.htmlserializer.HTMLSerializer(omit_optional_tags=False,
                                                  quote_attr_values=True)
     return u''.join(s.serialize(stream))
+
+def extract_images(data):
+    matches = RE_IMG.match(data)
+    print "extract images"
+    print matches
+    return matches
diff --git a/requirements.txt b/requirements.txt
index 275b915..8d0670d 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,2 +1,3 @@
 transifex-client
 django-cms>=2.3
+html5lib
diff --git a/setup.py b/setup.py
index 347322b..10b4f95 100644
--- a/setup.py
+++ b/setup.py
@@ -5,7 +5,7 @@ from djangocms_text_ckeditor import __version__
 
 
 INSTALL_REQUIRES = [
-
+    'html5lib',
 ]
 
 CLASSIFIERS = [