speed up escape sequences

Changes readStringFromStream to use a dict of escapes rather than a long if/else chain. (should lead to speed up, and looks cleaner)
2016-08-22 15:23:34 -04:00 · 2016-08-22 15:23:34 -04:00 · d7f5eafddb
parent 4fc7f9d14a
commit d7f5eafddb
1 changed files with 50 additions and 49 deletions
--- a/PyPDF2/generic.py
+++ b/PyPDF2/generic.py
@ -340,55 +340,56 @@ def readStringFromStream(stream):
                break
        elif tok == b_("\\"):
            tok = stream.read(1)
-            if tok == b_("n"):
-                tok = b_("\n")
-            elif tok == b_("r"):
-                tok = b_("\r")
-            elif tok == b_("t"):
-                tok = b_("\t")
-            elif tok == b_("b"):
-                tok = b_("\b")
-            elif tok == b_("f"):
-                tok = b_("\f")
-            elif tok == b_("c"):
-                tok = b_("\c")
-            elif tok == b_("("):
-                tok = b_("(")
-            elif tok == b_(")"):
-                tok = b_(")")
-            elif tok == b_("/"):
-                tok = b_("/")
-            elif tok == b_("\\"):
-                tok = b_("\\")
-            elif tok in (b_(" "), b_("/"), b_("%"), b_("<"), b_(">"), b_("["), 
-                    b_("]"), b_("#"),  b_("_"), b_("&"), b_('$')):
-                # odd/unnessecary escape sequences we have encountered
-                tok = b_(tok)
-            elif tok.isdigit():
-                # "The number ddd may consist of one, two, or three
-                # octal digits; high-order overflow shall be ignored.
-                # Three octal digits shall be used, with leading zeros
-                # as needed, if the next character of the string is also
-                # a digit." (PDF reference 7.3.4.2, p 16)
-                for i in range(2):
-                    ntok = stream.read(1)
-                    if ntok.isdigit():
-                        tok += ntok
-                    else:
-                        break
-                tok = b_(chr(int(tok, base=8)))
-            elif tok in b_("\n\r"):
-                # This case is  hit when a backslash followed by a line
-                # break occurs.  If it's a multi-char EOL, consume the
-                # second character:
-                tok = stream.read(1)
-                if not tok in b_("\n\r"):
-                    stream.seek(-1, 1)
-                # Then don't add anything to the actual string, since this
-                # line break was escaped:
-                tok = b_('')
-            else:
-                raise utils.PdfReadError(r"Unexpected escaped string: %s" % tok)
+            ESCAPE_DICT = {b_("n") : b_("\n"),
+                           b_("r") : b_("\r"),
+                           b_("t") : b_("\t"),
+                           b_("b") : b_("\b"),
+                           b_("f") : b_("\f"),
+                           b_("c") : b_("\c"),
+                           b_("(") : b_("("),
+                           b_(")") : b_(")"),
+                           b_("/") : b_("/"),
+                           b_("\\") : b_("\\"),
+                           b_(" ") : b_(" "),
+                           b_("/") : b_("/"),
+                           b_("%") : b_("%"),
+                           b_("<") : b_("<"),
+                           b_(">") : b_(">"),
+                           b_("[") : b_("["), 
+                           b_("]") : b_("]"),
+                           b_("#") : b_("#"),
+                           b_("_") : b_("_"),
+                           b_("&") : b_("&"),
+                           b_('$') : b_('$'),
+                           }
+            try:
+                tok = escape_dict[tok]
+            except KeyError:
+                if tok.isdigit():
+                    # "The number ddd may consist of one, two, or three
+                    # octal digits; high-order overflow shall be ignored.
+                    # Three octal digits shall be used, with leading zeros
+                    # as needed, if the next character of the string is also
+                    # a digit." (PDF reference 7.3.4.2, p 16)
+                    for i in range(2):
+                        ntok = stream.read(1)
+                        if ntok.isdigit():
+                            tok += ntok
+                        else:
+                            break
+                    tok = b_(chr(int(tok, base=8)))
+                elif tok in b_("\n\r"):
+                    # This case is  hit when a backslash followed by a line
+                    # break occurs.  If it's a multi-char EOL, consume the
+                    # second character:
+                    tok = stream.read(1)
+                    if not tok in b_("\n\r"):
+                        stream.seek(-1, 1)
+                    # Then don't add anything to the actual string, since this
+                    # line break was escaped:
+                    tok = b_('')
+                else:
+                    raise utils.PdfReadError(r"Unexpected escaped string: %s" % tok)
        txt += tok
    return createStringObject(txt)