diff options
| author | Alvaro Herrera | 2009-05-24 15:02:27 +0000 |
|---|---|---|
| committer | Alvaro Herrera | 2009-05-24 15:02:27 +0000 |
| commit | 4ab3494e51c93653b77bc7ded2078e7b1bf7c48d (patch) | |
| tree | 1d514b629b7e57e4e55ea4deb3f5d5fd7463b99d /archives | |
| parent | b5108c4734910339e0d0b08a4400ea73115785e7 (diff) | |
Add script to split an mbox into individual messages labelled by Message-Id
git-svn-id: file:///Users/dpage/pgweb/svn-repo/trunk@2492 8f5c7a92-453e-0410-a47f-ad33c8a6b003
Diffstat (limited to 'archives')
| -rwxr-xr-x | archives/bin/splitmbox | 65 |
1 files changed, 65 insertions, 0 deletions
diff --git a/archives/bin/splitmbox b/archives/bin/splitmbox new file mode 100755 index 00000000..7f679897 --- /dev/null +++ b/archives/bin/splitmbox @@ -0,0 +1,65 @@ +#!/bin/bash +# +# Split an mbox into message files, and sort them into directories according +# to message-id splitting. +# +# This needs to run under bash because some of the ${} stuff is a bash-ism. +# +# $Id$ + +mbox=$1 +destdir=messages +export destdir + +# *() and other patterns need this set +shopt -s extglob + +if [ ! -f $mbox ]; then + echo "mbox $mbox not found" >&2 + exit 1 +fi + +if [ ! -d $destdir ]; then + echo "destination directory $destdir does not exist" >&2 + exit 1 +fi + +if [ $(basename $mbox) = $(basename $mbox .gz) ]; then + cat=cat +else + cat=zcat +fi + +tempdir=$(mktemp -d) +export tempdir +$cat $mbox | formail -s sh -c 'cat - > $tempdir/msg.$FILENO' + +for message in $tempdir/msg.*; do + # Fetch the Message-Id. Note: due to braindamage at Microsoft we need to + # cope with messages having more than one Message-Id, so we loop here. + formail -x Message-Id < $message | while read messageid; do + # strip the initial " <", the trailing ">", and replace /'s with _ + messageid=${messageid##*([< ])} + messageid=${messageid%%>} + messageid=${messageid//\//_} + # fetch the part after the first @ (inclusive) + dir=${messageid##*([^@])} + # escape [, ? and * from it (these are special chars for bash ${} expansion) + diresc=${dir/[/\\[} + diresc=${diresc/\*/\\*} + diresc=${diresc/\?/\\?} + # and fetch the part before the @ (i.e. strip $dir) + file=${messageid%%$diresc} + # create the directory if needed + if [ ! -d "$destdir/$dir" ]; then + mkdir -v "$destdir/$dir" || echo "failed to create $destdir/$dir" + fi + # and link the message into place + if [ ! -f "$destdir/$dir/$file" ]; then + ln $message "$destdir/$dir/$file" || echo "failed to link $message to $destdir/$dir/$file" + fi + done + rm $message +done +rmdir $tempdir # unless something remained + |
