247 lines
7.9 KiB

#!/usr/bin/env python3
"""Classes to parse mailer-daemon messages."""
import calendar
import email.message
import re
import os
import sys
class Unparseable(Exception):
pass
class ErrorMessage(email.message.Message):
def __init__(self):
email.message.Message.__init__(self)
self.sub = ''
def is_warning(self):
sub = self.get('Subject')
if not sub:
return 0
sub = sub.lower()
if sub.startswith('waiting mail'):
return 1
if 'warning' in sub:
return 1
self.sub = sub
return 0
def get_errors(self):
for p in EMPARSERS:
self.rewindbody()
try:
return p(self.fp, self.sub)
except Unparseable:
pass
raise Unparseable
# List of re's or tuples of re's.
# If a re, it should contain at least a group (?P<email>...) which
# should refer to the email address. The re can also contain a group
# (?P<reason>...) which should refer to the reason (error message).
# If no reason is present, the emparse_list_reason list is used to
# find a reason.
# If a tuple, the tuple should contain 2 re's. The first re finds a
# location, the second re is repeated one or more times to find
# multiple email addresses. The second re is matched (not searched)
# where the previous match ended.
# The re's are compiled using the re module.
emparse_list_list = [
'error: (?P<reason>unresolvable): (?P<email>.+)',
('----- The following addresses had permanent fatal errors -----\n',
'(?P<email>[^ \n].*)\n( .*\n)?'),
'remote execution.*\n.*rmail (?P<email>.+)',
('The following recipients did not receive your message:\n\n',
' +(?P<email>.*)\n(The following recipients did not receive your message:\n\n)?'),
'------- Failure Reasons --------\n\n(?P<reason>.*)\n(?P<email>.*)',
'^<(?P<email>.*)>:\n(?P<reason>.*)',
'^(?P<reason>User mailbox exceeds allowed size): (?P<email>.+)',
'^5\\d{2} <(?P<email>[^\n>]+)>\\.\\.\\. (?P<reason>.+)',
'^Original-Recipient: rfc822;(?P<email>.*)',
'^did not reach the following recipient\\(s\\):\n\n(?P<email>.*) on .*\n +(?P<reason>.*)',
'^ <(?P<email>[^\n>]+)> \\.\\.\\. (?P<reason>.*)',
'^Report on your message to: (?P<email>.*)\nReason: (?P<reason>.*)',
'^Your message was not delivered to +(?P<email>.*)\n +for the following reason:\n +(?P<reason>.*)',
'^ was not +(?P<email>[^ \n].*?) *\n.*\n.*\n.*\n because:.*\n +(?P<reason>[^ \n].*?) *\n',
]
# compile the re's in the list and store them in-place.
for i in range(len(emparse_list_list)):
x = emparse_list_list[i]
if type(x) is type(''):
x = re.compile(x, re.MULTILINE)
else:
xl = []
for x in x:
xl.append(re.compile(x, re.MULTILINE))
x = tuple(xl)
del xl
emparse_list_list[i] = x
del x
del i
# list of re's used to find reasons (error messages).
# if a string, "<>" is replaced by a copy of the email address.
# The expressions are searched for in order. After the first match,
# no more expressions are searched for. So, order is important.
emparse_list_reason = [
r'^5\d{2} <>\.\.\. (?P<reason>.*)',
r'<>\.\.\. (?P<reason>.*)',
re.compile(r'^<<< 5\d{2} (?P<reason>.*)', re.MULTILINE),
re.compile('===== stderr was =====\nrmail: (?P<reason>.*)'),
re.compile('^Diagnostic-Code: (?P<reason>.*)', re.MULTILINE),
]
emparse_list_from = re.compile('^From:', re.IGNORECASE|re.MULTILINE)
def emparse_list(fp, sub):
data = fp.read()
res = emparse_list_from.search(data)
if res is None:
from_index = len(data)
else:
from_index = res.start(0)
errors = []
emails = []
reason = None
for regexp in emparse_list_list:
if type(regexp) is type(()):
res = regexp[0].search(data, 0, from_index)
if res is not None:
try:
reason = res.group('reason')
except IndexError:
pass
while 1:
res = regexp[1].match(data, res.end(0), from_index)
if res is None:
break
emails.append(res.group('email'))
break
else:
res = regexp.search(data, 0, from_index)
if res is not None:
emails.append(res.group('email'))
try:
reason = res.group('reason')
except IndexError:
pass
break
if not emails:
raise Unparseable
if not reason:
reason = sub
if reason[:15] == 'returned mail: ':
reason = reason[15:]
for regexp in emparse_list_reason:
if type(regexp) is type(''):
for i in range(len(emails)-1,-1,-1):
email = emails[i]
exp = re.compile(re.escape(email).join(regexp.split('<>')), re.MULTILINE)
res = exp.search(data)
if res is not None:
errors.append(' '.join((email.strip()+': '+res.group('reason')).split()))
del emails[i]
continue
res = regexp.search(data)
if res is not None:
reason = res.group('reason')
break
for email in emails:
errors.append(' '.join((email.strip()+': '+reason).split()))
return errors
EMPARSERS = [emparse_list]
def sort_numeric(a, b):
a = int(a)
b = int(b)
if a < b:
return -1
elif a > b:
return 1
else:
return 0
def parsedir(dir, modify):
os.chdir(dir)
pat = re.compile('^[0-9]*$')
errordict = {}
errorfirst = {}
errorlast = {}
nok = nwarn = nbad = 0
# find all numeric file names and sort them
files = list(filter(lambda fn, pat=pat: pat.match(fn) is not None, os.listdir('.')))
files.sort(sort_numeric)
for fn in files:
# Lets try to parse the file.
fp = open(fn)
m = email.message_from_file(fp, _class=ErrorMessage)
sender = m.getaddr('From')
print('%s\t%-40s\t'%(fn, sender[1]), end=' ')
if m.is_warning():
fp.close()
print('warning only')
nwarn = nwarn + 1
if modify:
os.rename(fn, ','+fn)
## os.unlink(fn)
continue
try:
errors = m.get_errors()
except Unparseable:
print('** Not parseable')
nbad = nbad + 1
fp.close()
continue
print(len(errors), 'errors')
# Remember them
for e in errors:
try:
mm, dd = m.getdate('date')[1:1+2]
date = '%s %02d' % (calendar.month_abbr[mm], dd)
except:
date = '??????'
if e not in errordict:
errordict[e] = 1
errorfirst[e] = '%s (%s)' % (fn, date)
else:
errordict[e] = errordict[e] + 1
errorlast[e] = '%s (%s)' % (fn, date)
fp.close()
nok = nok + 1
if modify:
os.rename(fn, ','+fn)
## os.unlink(fn)
print('--------------')
print(nok, 'files parsed,',nwarn,'files warning-only,', end=' ')
print(nbad,'files unparseable')
print('--------------')
list = []
for e in errordict.keys():
list.append((errordict[e], errorfirst[e], errorlast[e], e))
list.sort()
for num, first, last, e in list:
print('%d %s - %s\t%s' % (num, first, last, e))
def main():
modify = 0
if len(sys.argv) > 1 and sys.argv[1] == '-d':
modify = 1
del sys.argv[1]
if len(sys.argv) > 1:
for folder in sys.argv[1:]:
parsedir(folder, modify)
else:
parsedir('/ufs/jack/Mail/errorsinbox', modify)
if __name__ == '__main__' or sys.argv[0] == __name__:
main()