166 lines
6.2 KiB
Python
166 lines
6.2 KiB
Python
# Copyright (C) 1998-2006 by the Free Software Foundation, Inc.
|
||
#
|
||
# This program is free software; you can redistribute it and/or
|
||
# modify it under the terms of the GNU General Public License
|
||
# as published by the Free Software Foundation; either version 2
|
||
# of the License, or (at your option) any later version.
|
||
#
|
||
# This program is distributed in the hope that it will be useful,
|
||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
# GNU General Public License for more details.
|
||
#
|
||
# You should have received a copy of the GNU General Public License
|
||
# along with this program; if not, write to the Free Software
|
||
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
|
||
# USA.
|
||
|
||
"""Recognizes simple heuristically delimited bounces."""
|
||
|
||
import re
|
||
import email.Iterators
|
||
|
||
|
||
|
||
def _c(pattern):
|
||
return re.compile(pattern, re.IGNORECASE)
|
||
|
||
# This is a list of tuples of the form
|
||
#
|
||
# (start cre, end cre, address cre)
|
||
#
|
||
# where `cre' means compiled regular expression, start is the line just before
|
||
# the bouncing address block, end is the line just after the bouncing address
|
||
# block, and address cre is the regexp that will recognize the addresses. It
|
||
# must have a group called `addr' which will contain exactly and only the
|
||
# address that bounced.
|
||
PATTERNS = [
|
||
# sdm.de
|
||
(_c('here is your list of failed recipients'),
|
||
_c('here is your returned mail'),
|
||
_c(r'<(?P<addr>[^>]*)>')),
|
||
# sz-sb.de, corridor.com, nfg.nl
|
||
(_c('the following addresses had'),
|
||
_c('transcript of session follows'),
|
||
_c(r'<(?P<fulladdr>[^>]*)>|\(expanded from: <?(?P<addr>[^>)]*)>?\)')),
|
||
# robanal.demon.co.uk
|
||
(_c('this message was created automatically by mail delivery software'),
|
||
_c('original message follows'),
|
||
_c('rcpt to:\s*<(?P<addr>[^>]*)>')),
|
||
# s1.com (InterScan E-Mail VirusWall NT ???)
|
||
(_c('message from interscan e-mail viruswall nt'),
|
||
_c('end of message'),
|
||
_c('rcpt to:\s*<(?P<addr>[^>]*)>')),
|
||
# Smail
|
||
(_c('failed addresses follow:'),
|
||
_c('message text follows:'),
|
||
_c(r'\s*(?P<addr>\S+@\S+)')),
|
||
# newmail.ru
|
||
(_c('This is the machine generated message from mail service.'),
|
||
_c('--- Below the next line is a copy of the message.'),
|
||
_c('<(?P<addr>[^>]*)>')),
|
||
# turbosport.com runs something called `MDaemon 3.5.2' ???
|
||
(_c('The following addresses did NOT receive a copy of your message:'),
|
||
_c('--- Session Transcript ---'),
|
||
_c('[>]\s*(?P<addr>.*)$')),
|
||
# usa.net
|
||
(_c('Intended recipient:\s*(?P<addr>.*)$'),
|
||
_c('--------RETURNED MAIL FOLLOWS--------'),
|
||
_c('Intended recipient:\s*(?P<addr>.*)$')),
|
||
# hotpop.com
|
||
(_c('Undeliverable Address:\s*(?P<addr>.*)$'),
|
||
_c('Original message attached'),
|
||
_c('Undeliverable Address:\s*(?P<addr>.*)$')),
|
||
# Another demon.co.uk format
|
||
(_c('This message was created automatically by mail delivery'),
|
||
_c('^---- START OF RETURNED MESSAGE ----'),
|
||
_c("addressed to '(?P<addr>[^']*)'")),
|
||
# Prodigy.net full mailbox
|
||
(_c("User's mailbox is full:"),
|
||
_c('Unable to deliver mail.'),
|
||
_c("User's mailbox is full:\s*<(?P<addr>[^>]*)>")),
|
||
# Microsoft SMTPSVC
|
||
(_c('The email below could not be delivered to the following user:'),
|
||
_c('Old message:'),
|
||
_c('<(?P<addr>[^>]*)>')),
|
||
# Yahoo on behalf of other domains like sbcglobal.net
|
||
(_c('Unable to deliver message to the following address\(es\)\.'),
|
||
_c('--- Original message follows\.'),
|
||
_c('<(?P<addr>[^>]*)>:')),
|
||
# kundenserver.de
|
||
(_c('A message that you sent could not be delivered'),
|
||
_c('^---'),
|
||
_c('<(?P<addr>[^>]*)>')),
|
||
# another kundenserver.de
|
||
(_c('A message that you sent could not be delivered'),
|
||
_c('^---'),
|
||
_c('^(?P<addr>[^\s@]+@[^\s@:]+):')),
|
||
# thehartford.com
|
||
(_c('Delivery to the following recipients failed'),
|
||
_c("Bogus - there actually isn't anything"),
|
||
_c('^\s*(?P<addr>[^\s@]+@[^\s@]+)\s*$')),
|
||
# and another thehartfod.com/hartfordlife.com
|
||
(_c('^Your message\s*$'),
|
||
_c('^because:'),
|
||
_c('^\s*(?P<addr>[^\s@]+@[^\s@]+)\s*$')),
|
||
# kviv.be (NTMail)
|
||
(_c('^Unable to deliver message to'),
|
||
_c(r'\*+\s+End of message\s+\*+'),
|
||
_c('<(?P<addr>[^>]*)>')),
|
||
# earthlink.net supported domains
|
||
(_c('^Sorry, unable to deliver your message to'),
|
||
_c('^A copy of the original message'),
|
||
_c('\s*(?P<addr>[^\s@]+@[^\s@]+)\s+')),
|
||
# ademe.fr
|
||
(_c('^A message could not be delivered to:'),
|
||
_c('^Subject:'),
|
||
_c('^\s*(?P<addr>[^\s@]+@[^\s@]+)\s*$')),
|
||
# andrew.ac.jp
|
||
(_c('^Invalid final delivery userid:'),
|
||
_c('^Original message follows.'),
|
||
_c('\s*(?P<addr>[^\s@]+@[^\s@]+)\s*$')),
|
||
# E500_SMTP_Mail_Service@lerctr.org
|
||
(_c('------ Failed Recipients ------'),
|
||
_c('-------- Returned Mail --------'),
|
||
_c('<(?P<addr>[^>]*)>')),
|
||
# cynergycom.net
|
||
(_c('A message that you sent could not be delivered'),
|
||
_c('^---'),
|
||
_c('(?P<addr>[^\s@]+@[^\s@)]+)')),
|
||
# Next one goes here...
|
||
]
|
||
|
||
|
||
|
||
def process(msg, patterns=None):
|
||
if patterns is None:
|
||
patterns = PATTERNS
|
||
# simple state machine
|
||
# 0 = nothing seen yet
|
||
# 1 = intro seen
|
||
addrs = {}
|
||
# MAS: This is a mess. The outer loop used to be over the message
|
||
# so we only looped through the message once. Looping through the
|
||
# message for each set of patterns is obviously way more work, but
|
||
# if we don't do it, problems arise because scre from the wrong
|
||
# pattern set matches first and then acre doesn't match. The
|
||
# alternative is to split things into separate modules, but then
|
||
# we process the message multiple times anyway.
|
||
for scre, ecre, acre in patterns:
|
||
state = 0
|
||
for line in email.Iterators.body_line_iterator(msg):
|
||
if state == 0:
|
||
if scre.search(line):
|
||
state = 1
|
||
if state == 1:
|
||
mo = acre.search(line)
|
||
if mo:
|
||
addr = mo.group('addr')
|
||
if addr:
|
||
addrs[mo.group('addr')] = 1
|
||
elif ecre.search(line):
|
||
break
|
||
if addrs:
|
||
break
|
||
return addrs.keys()
|