#!/bin/bash # # Split an mbox into message files, and sort them into directories according # to message-id splitting. # # This needs to run under bash because some of the ${} stuff is a bash-ism. # # $Id$ . $HOME/etc/archives.conf mbox=$1 destdir=$SPLITMBOX_DEST export destdir export TMPDIR=~/tmp # *() and other patterns need this set shopt -s extglob if [ ! -f $mbox ]; then echo "mbox $mbox not found" >&2 exit 1 fi if [ ! -d $destdir ]; then echo "destination directory $destdir does not exist" >&2 exit 1 fi if [ $(basename $mbox) = $(basename $mbox .gz) ]; then cat=cat else cat=zcat fi tempdir=$(mktemp -d) export tempdir $cat $mbox | formail -s sh -c 'cat - > $tempdir/msg.$FILENO' for message in $tempdir/msg.*; do # Fetch the Message-Id. Note: due to braindamage at Microsoft we need to # cope with messages having more than one Message-Id, so we loop here. formail -x Message-Id < $message | while read messageid; do # strip the initial " <", the trailing ">", and replace /'s with _ messageid=${messageid##*([< ])} messageid=${messageid%%>} messageid=${messageid//\//_} # fetch the part after the first @ (inclusive), and split in two levels of # dirs of two chars. So for @gmail.com we get gm/ai/@gmail.com dir=${messageid##*([^@])} dir=${dir:1:2}/${dir:3:2}/$dir # escape [, ? and * from it (these are special chars for bash ${} expansion) diresc=${dir/[/\\[} diresc=${diresc/\*/\\*} diresc=${diresc/\?/\\?} # and fetch the part before the @ (i.e. strip $dir), and split in two levels # of dirs of two chars file=${messageid%%$diresc} dir=$dir/${file:0:2}/${file:2:2} # create the directory if needed if [ ! -d "$destdir/$dir" ]; then mkdir -p "$destdir/$dir" || echo "failed to create $destdir/$dir" fi # and link the message into place if [ ! -f "$destdir/$dir/$file" ]; then ln $message "$destdir/$dir/$file" || echo "failed to link $message to $destdir/$dir/$file" fi done rm $message done rmdir $tempdir # unless something remained