#!/usr/bin/env python3 # dmarcpipe.py - DMARC report aggregator. # Copyright (C) 2016-2017 Tomasz Kramkowski # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # You should have received a copy of the GNU General Public License # along with this program. If not, see . from dmarc import parse_dmarc from getopt import gnu_getopt from gzip import decompress as gz_decompress from io import BytesIO from re import compile as re_compile from sql import store_dmarc from sys import stdin, stdout, argv from zipfile import ZipFile from magic import detect_from_content as magic import email re_valid_filename = re_compile('^[^\\s!]+![^\\s!]+![0-9]+![0-9]+(![^\\s!]+)?.(xml(.gz)?|zip)$') logfile = None dbfile = 'dmarc.db' class FalseReportException(Exception): pass def is_valid_filename(f): if re_valid_filename.match(f) is not None: return True return False def zip2xml(data): z = ZipFile(BytesIO(data)) n = z.namelist() if len(n) != 1 or not is_valid_filename(n[0]): raise FalseReportException('zip2xml: broken zip len({})'.format(len(n))) return z.open(n[0]).read() gzip2xml = lambda data: gz_decompress(data) octet_stream = lambda data: decode(data, magic(data).mime_type) decoders = { 'application/gzip': gzip2xml, 'application/x-gzip': gzip2xml, 'application/octet-stream': octet_stream, 'application/x-zip': zip2xml, 'application/x-zip-compressed': zip2xml, 'application/zip': zip2xml, 'application/zlib': gzip2xml, 'text/xml': lambda data: data, } def decode(data, mime): try: return decoders[mime](data) except KeyError: raise FalseReportException('invalid content type: {}'.format(mime)) def process_message(m): att = None inv_fn = False for part in m.walk(): if part.get_content_maintype() == 'multipart': continue if part.get_content_type() not in decoders.keys(): continue if part.get_content_disposition() is None: continue if not is_valid_filename(part.get_filename()): inv_fn = True continue att = part break else: raise FalseReportException('attachment not found{}'.format(inv_fn and ' (invalid filename)' or '')) xml = decode(att.get_payload(decode=True), att.get_content_type()) dmarc = parse_dmarc(xml) store_dmarc(dbfile, dmarc) def main(): global logfile, dbfile opts, args = gnu_getopt(argv, 'l:') for o in opts: if o[0] == '-l': logfile = o[1] if len(args) == 2: dbfile = args[1] try: m = email.message_from_file(stdin) process_message(m) m['X-DMARC-Report'] = 'True' except Exception as e: m['X-DMARC-Report'] = 'False; {}'.format(repr(e)) finally: stdout.write(m.as_string()) if __name__ == '__main__': main()