I've written a python script to handle email archives. This script scans my Maildir directories and moves messages that are older than a directory-dependant age to a compressed archive. I will run this from cron.
#!/usr/bin/python
#
# Walks through Maildir store, compressing and deleting old email messages.
#
import email
import os
import time
import bz2
strRootMailDir = '/home/pcw/Maildir'
#
# Folders that can be pruned
# Defined as a dictionary where keys are directory names and values
# are maximum age in weeks
#
oPrunable = { '.IT.Auto.ISA Server Alert': 2,
'.IT.Auto.Message Deleted': 2,
'.IT.Auto.Sophos Alert': 2,
'.IT.Auto.Sweep Report': 2,
'.IT.Auto.Virus': 2,
'.INBOX': 12, # can sit in inbox for 3 months
'.Sent': 4}
#
# Callback from os.walk to prune a particular directory.
#
def ProcessDir( strArg, strDir, strNames):
#
# Split tail part of directory name.
# This should be cur, new, tmp etc.
#
strPartDir, strTailDir = os.path.split( strDir)
if strTailDir != 'cur':
return
#
# Get maildir mail folder name.
#
strPartDir, strMailDir = os.path.split( strPartDir)
#
# Make sure folder is in list that are subject to pruning.
#
if not oPrunable.has_key( strMailDir):
return
#
# Go through the files.
#
for strFile in strNames:
strPath = strDir + '/' + strFile
#
# Get file creation time. Don't look in message itself as sent time
# is untrustworthy. Process old files
#
oStat = os.stat( strPath)
nFileTime = oStat.st_ctime
if time.time() - nFileTime > (3600 * 24 * 7 * oPrunable[strMailDir]):
#
# Attempt to parse message. Make sure it is a mail message.
#
oMessage = email.message_from_file( open(strPath))
strFrom = oMessage['from']
#
# Archive old email comressed.
# Shove in existing BZ2 archive in mbox format.
# The mbox is readable by the python mailbox module.
#
oBZ2 = bz2.BZ2Compressor()
oBZ2.compress( 'From %s %s\n' % (strFrom,
time.strftime( "%a %b %d %H:%M:%S %Y", time.localtime( nFileTime))))
oBZ2.compress( open( strPath).read())
oBZ2.compress( '\n')
#
# Create Archive directory, per maildir, per month.
#
strArchiveName = strRootMailDir + '/Archive.%s.%s.bz2' % (
strMailDir[1:],
time.strftime( '%b%Y', time.localtime( nFileTime)))
open( strArchiveName, 'a').write( oBZ2.flush())
#
# Delete the file.
#
os.unlink( strPath)
os.path.walk( strRootMailDir, ProcessDir, '')