A friend asked me if I could help him write a Python script for fetching and processing data from emails in his mailbox … Well, the thing with emails is that they’re a pain to work with (in any form). So, I tried to help him out with a little scaffolding (also available as a Gist).
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python3 | |
# | |
# Author: Riyad Preukschas <riyad@informatik.uni-bremen.de> | |
# License: Mozilla Public License 2.0 | |
# | |
# Scaffolding for fetching and parsing emails from IMAP | |
import imaplib | |
import email.parser | |
import email.policy | |
IMAP_HOST = '' | |
IMAP_USER = '' | |
IMAP_PASSWORD = '' | |
# connect to server | |
with imaplib.IMAP4_SSL(IMAP_HOST) as imap: | |
# login with user name and password | |
imap.login(IMAP_USER, IMAP_PASSWORD) | |
# select mail "folder" to work in | |
imap.select('INBOX', readonly=True) | |
# list ALL mails in folder ... can also be used to filter mails | |
# see https://tools.ietf.org/html/rfc2060#section-6.4.4 | |
typ, data = imap.search(None, 'ALL') | |
message_ids = data[0].split() | |
for message_id in message_ids: | |
# fetch "raw" email data | |
typ, data = imap.fetch(message_id, '(RFC822)') | |
response_part_rfc822_data = data[0][1] | |
# create an EmailMessage object from the raw data | |
msg = email.parser.BytesParser( | |
policy=email.policy.default).parsebytes(response_part_rfc822_data) | |
# do your stuff here ... | |
# for access to specific parts of the email message | |
# see https://docs.python.org/3/library/email.message.html#email.message.EmailMessage | |
email_subject = msg['Subject'] | |
email_from = msg['From'] | |
# get email body in plain text form (instead of html) | |
# ... this might fail, because some mails are HTML-only :/ | |
email_text = msg.get_body('plain').get_content() | |
print('From : ' + email_from + '\n') | |
print('Subject : ' + email_subject + '\n') | |
print(email_text) |