head	1.23;
access;
symbols;
locks; strict;
comment	@# @;


1.23
date	2007.04.11.16.32.48;	author roderick;	state Exp;
branches;
next	1.22;

1.22
date	2007.04.06.20.19.54;	author roderick;	state Exp;
branches;
next	1.21;

1.21
date	2006.11.22.19.13.00;	author roderick;	state Exp;
branches;
next	1.20;

1.20
date	2006.10.17.14.07.20;	author roderick;	state Exp;
branches;
next	1.19;

1.19
date	2006.09.08.13.31.39;	author roderick;	state Exp;
branches;
next	1.18;

1.18
date	2005.03.01.16.57.15;	author roderick;	state Exp;
branches;
next	1.17;

1.17
date	2004.09.02.14.49.57;	author roderick;	state Exp;
branches;
next	1.16;

1.16
date	2002.12.18.18.21.58;	author roderick;	state Exp;
branches;
next	1.15;

1.15
date	2002.12.13.22.18.29;	author roderick;	state Exp;
branches;
next	1.14;

1.14
date	2002.05.06.14.43.59;	author roderick;	state Exp;
branches;
next	1.13;

1.13
date	2002.01.21.18.35.41;	author roderick;	state Exp;
branches;
next	1.12;

1.12
date	2001.09.13.18.31.42;	author roderick;	state Exp;
branches;
next	1.11;

1.11
date	2001.07.27.16.24.39;	author roderick;	state Exp;
branches;
next	1.10;

1.10
date	2000.05.30.22.09.27;	author roderick;	state Exp;
branches;
next	1.9;

1.9
date	2000.05.04.16.41.35;	author roderick;	state Exp;
branches;
next	1.8;

1.8
date	99.04.10.08.56.18;	author roderick;	state Exp;
branches;
next	1.7;

1.7
date	99.03.25.16.22.10;	author roderick;	state Exp;
branches;
next	1.6;

1.6
date	97.12.01.18.43.59;	author roderick;	state Exp;
branches;
next	1.5;

1.5
date	97.11.07.18.28.52;	author roderick;	state Exp;
branches;
next	1.4;

1.4
date	97.10.13.16.05.11;	author roderick;	state Exp;
branches;
next	1.3;

1.3
date	97.07.18.19.13.39;	author roderick;	state Exp;
branches;
next	1.2;

1.2
date	97.07.11.21.31.08;	author roderick;	state Exp;
branches;
next	1.1;

1.1
date	97.07.11.18.20.24;	author roderick;	state Exp;
branches;
next	;


desc
@@


1.23
log
@Typo.
@
text
@#!/usr/bin/perl -w
use strict;

# $Id: mbox-purge,v 1.22 2007-04-06 16:19:54-04 roderick Exp roderick $
#
# Copyright (c) 1997 Roderick Schertler.  All rights reserved.  This
# program is free software; you can redistribute it and/or modify it
# under the same terms as Perl itself.

use sigtrap qw(die untrapped normal-signals);

use POSIX		qw(:errno_h);
use Proc::WaitStat	qw(waitstat_die);
use RS::Handy		qw(:stat $Me
			    badinvo chompexpr_fileline dstr data_dump
			    exclusive_create mbox_read_head mbox_read_body
			    mbox_escape mbox_escape_body_part_in_place xdie);

# Compile code from the user.  This comes before anything else so it
# can't access my lexicals.

sub user_eval {
    @@_ == 1 || badinvo;

    local $SIG{__DIE__};
    no strict 'vars';
    return eval shift;
}

my $Usage = <<EOF;
usage: $Me [switch]... mbox_file...
switches:
 -c, --copy-to file	copy messages which are being deleted to file
     --debug		turn debugging on
     --help		show the help and die
     --newline s	use s as the line terminator rather than guessing
     --newline-native	use the native line terminator rather than guessing
 -n, --no		don't actually modify any files
     --quiet		suppress informational messages
     --verbose		output additional informational messages
     --version		show the version and exit
criteria:
     --before       date-time	delivered before date-time
     --before-or-at date-time	delivered before or exactly at date-time
     --after        date-time	delivered after  date-time
     --after-or-at  date-time	delivered after  or exactly at date-time
     --pattern pat	head+body match   Perl regex /pat/m
     --head-pattern pat	head      matches Perl regex /pat/m
     --body-pattern pat	body      matches Perl regex /pat/m
     --eval code	\$code->(\\\$head, \\\$body, \\\$msg) returns true
     --head-eval code	\$code->(\\\$head) returns true

Any messages in the given files which match all the criteria are deleted.

Use \`perldoc $Me\' to see the full documentation.
EOF

my $Copy_mbox	= undef;
my $Debug	= 0;
my $Exit	= 0;
my $Guess_line_endings = 1;
my $Lock_max_age = 60;
my $No		= 0;
my $Quiet	= 0;
my @@Tmp		= ();
my $Verbose	= 0;
my $Version	 = q$Revision: 1.22 $ =~ /(\d\S+)/ ? $1 : '?';

# globals with info about current message
my $File_name	= undef;
my $Msg_num	= undef;
my %Message_info = ();

# first found is used, sub can return either undef or -1 on failure
my @@Parse_date	= qw(Date::Parse::str2time Date::GetDate::getdate);

sub xwarn {
    RS::Handy::xwarn @@_;
    $Exit ||= 1;
}

sub usage {
    xwarn @@_ if @@_;
    die $Usage;
}

sub info {
    print "$Me: ", @@_, "\n" unless $Quiet;
}

sub verbose {
    print "$Me: ", @@_, "\n" if $Verbose;
}

sub debug {
    print "debug: ", @@_, "\n" if $Debug;
}

sub create_tmp {
    my ($orig) = @@_;
    my ($base, $ext, $new, $fh);

    $base = "$orig.tmp.$$";
    $ext = 1;
    $new = $base;
    until ($fh = exclusive_create $new) {
	$! == EEXIST or xdie "can't create $new:";
	xdie "can't create a file named like $base.* in $ext tries\n"
	    if $ext == 100;
	$ext++;
	$new = "$base.$ext";
    }
    return $new, $fh;
}


{ my %locked;
sub lock_file {
    @@_ == 1 || badinvo;
    my ($file) = @@_;

    debug "lock $file";
    return if $locked{$file};
    my $lock_file = "$file.lock";
    system qw(lockfile -1 -r 10), $lock_file;
    waitstat_die $?, "lockfile for $lock_file";
    push @@Tmp, $lock_file;
    $locked{$file} = time;
}

# Other processes can think a lock is stale if it's old (procmail does
# this by default, after about 17 minutes with my version), so touch
# the lock files periodically.

sub touch_locks {
    my $t = time;
    while (my ($file, $t0) = each %locked) {
    	my $lock = "$file.lock";
	my $age = $t - $t0;
	my $do_touch = $age > $Lock_max_age;
	next unless $do_touch;
	debug "lock $lock age $age do_touch $do_touch";
	utime $t, $t, $lock
	    or xdie "can't touch $lock:";
	$locked{$file} = $t;
    }
}

sub unlock_file {
    @@_ == 1 || badinvo;
    my ($file) = @@_;

    debug "unlock $file";
    $locked{$file}
	or xdie "attempty to unlock file which isn't locked: $file\n";
    my $lock_file = "$file.lock";
    unlink $lock_file
	or xdie "error unlinking $lock_file:";
    @@Tmp = grep { $_ ne $lock_file } @@Tmp;
    delete $locked{$file};
} }

{ my $sub;
sub parse_date {
    my ($in) = @@_;

    if (!$sub) {
    	for my $full (@@Parse_date) {
	    (my $mod = $full) =~ s/::[^:]+$// or die;
	    next unless eval "require $mod";
	    if (!defined &$full) {
		xwarn "$mod doesn't define $full\n";
		next;
	    }
	    debug "parse_date using $full";
	    $sub = do { no strict 'refs'; \&$full };
	    last;
	}
	$sub or xdie "no date parsing function available, I tried to load:",
	    	" @@Parse_date\n";
    }

    my $t = $sub->($in);
    if (defined $t && $t == -1) {
	$t = undef;
    }

    if ($Debug) {
	my $out = defined $t ? localtime $t : undef;
	debug sprintf "%s -> %s (%s)", map { defined $_ ? $_ : 'undef' }
		$in, $t, $out;
    }

    return $t;
} }

sub parse_from_line {
    @@_ == 0 || badinvo;

    return if exists $Message_info{from_line};

    if (${ $Message_info{rhead} } !~ /^(From\s+.*)/) {
	info "no From_ line in message $Msg_num of $File_name";
	$Message_info{from_line} = undef;
	return;
    }
    $Message_info{from_line} = $1;

    if ($Message_info{from_line}
	  !~ /^From \s+ (.*?) \s+ (\w\w\w \s \w\w\w \s+ \d+ \s+ \d+:\d+.*)/x) {
	info "can't parse From_ line in message $Msg_num of $File_name";
	return;
    }

    $Message_info{from_sender}	= $1;
    $Message_info{from_date}	= $2;

    $Message_info{delivery_time} = parse_date $Message_info{from_date}
	or info "invalid delivery date ($Message_info{from_date})",
		" in message $Msg_num of $File_name";
}

sub delivery_time {
    @@_ == 0 || badinvo;

    parse_from_line unless exists $Message_info{from_line};
    return $Message_info{delivery_time};
}

sub envelope_sender {
    @@_ == 0 || badinvo;

    parse_from_line unless exists $Message_info{from_line};
    return $Message_info{from_sender};
}

sub header_all {
    @@_ == 1 || badinvo;
    my ($pat) = @@_;

    return ${ $Message_info{rhead} } =~ /^$pat\s*:\s*(.*)/gim;
}

sub header_first {
    return (header_all @@_)[0];
}

sub header_last {
    return (header_all @@_)[-1];
}

sub mozilla_expunged {
    @@_ == 0 || badinvo;

    my $ms = header_first 'X-Mozilla-Status';
    return defined $ms
    	    	&& $ms =~ /^[\da-f]+\z/
		&& hex($ms) & 8;
}

{ my %fh;
sub mbox_append {
    my ($file, $rmsg) = @@_;

    lock_file $file;
    my $fh = $fh{$file} ||= do {
    	debug "open $file for appending";
    	require Symbol;
	my $fh = Symbol::gensym();
	open $fh, ">>$file"
	    or xdie "can't append to $file:";
	$fh
    };

    print $fh mbox_escape ${ $rmsg }
	or xdie "error appending to $file:";
}
END {
    while (my ($file, $fh) = each %fh) {
	debug "closing $file";
	if (!close $fh) {
	    xwarn "error closing $file:";
	    $? ||= 1;
	}
    }
} }

# user's accessors for globals

sub file_name {
    return $File_name;
}

sub msg_num {
    return $Msg_num;
}

sub main {
    my (@@rule, $any_date);

    @@ARGV || usage;

    while (@@ARGV && $ARGV[0] =~ /^-/) {
	$_ = shift @@ARGV;
	if ($_ eq '--') {
	    last;
	}
	elsif ($_ eq '--copy-to' || $_ eq '-c') {
	    @@ARGV or xdie "no arg for $_\n";
	    $Copy_mbox = shift @@ARGV;
	}
	elsif (/^--?debug\z/) {
	    $Debug = 1;
	}
	elsif ($_ eq '--help') {
	    usage;
	}
	elsif ($_ eq '--newline') {
	    @@ARGV or xdie "no arg for $_\n";
	    $/ = shift @@ARGV;
    	    $Guess_line_endings = 0;
    	}
	elsif ($_ eq '--newline-native') {
    	    $/ = "\n";
    	    $Guess_line_endings = 0;
    	}
	elsif ($_ eq '--no' || $_ eq '-n') {
	    $No = 1;
	}
	elsif (/^--?quiet\z/) {
	    $Quiet = 1;
	}
	elsif (/^--?verbose\z/) {
	    $Verbose = 1;
	}
	elsif ($_ eq '--version') {
	    print "$Me version $Version\n";
	    exit 0;
	}
	elsif (/^--?((before|after)(-or-at)?)\z/) {
	    my $rule = $1;
	    @@ARGV or xdie "no arg for $rule\n";
	    my $spec = shift @@ARGV;
	    my $time = parse_date $spec;
	    defined $time && $time > 0 or xdie "invalid time `$spec'\n";
	    # getdate() has ambituities (eg, 040101 = 2004-01-01), so
	    # help by choking on dates in the future.
	    if ($time > time) {
		xdie "$rule value is in the future (",
		    scalar localtime $time, ")\n";
	    }
	    push @@rule, [$rule, $time];
	    $any_date = 1;
	}
	elsif (/^--?((head-|body-)?pattern)\z/) {
	    my $rule = $1;
	    @@ARGV or xdie "no arg for $rule\n";
	    my $pat = shift @@ARGV;
	    my $sub = eval 'sub { ${ $_[0] } =~ /$pat/om }';
	    # Validate and compile the pattern by calling the closure
	    # for the first time.
	    eval { $sub->(\ "") };		# space after \ helps emacs
	    if ($@@) {
		$@@ = chompexpr_fileline $@@;
		xdie "invalid $_ $@@\n";
	    }
	    push @@rule, [$rule, $sub];
	}
	elsif (/^--?((head-)?eval)\z/) {
	    my $rule = $1;
	    @@ARGV or xdie "no arg for $rule\n";
	    my $code = shift @@ARGV;
	    my $sub = user_eval "sub { $code }";
	    if ($@@) {
		$@@ =~ s/ at .eval \d+. line \d+.\n//;
		xdie "invalid $_ code `$code': $@@\n";
	    }
	    push @@rule, [$rule, $sub];
	}
	else {
	    usage "invalid switch $_\n";
	}
    }
    @@rule or usage "no rules specified\n";
    @@ARGV or usage "no files specified\n";

    # XXX wrap the whole file loop in an eval, go to next file on a
    # failure

    for my $file_name (@@ARGV) {
	my (@@stat, $new_file, $new_fh, $n_kept, @@stat2);

    	$File_name = $file_name;
	verbose "processing $File_name";

	lock_file $File_name;

	open FILE, $File_name or xdie "can't read $File_name:";
	@@stat = stat FILE or xdie "error statting open $File_name:";

    	# Try to guess the line endings used in the file.

	if ($Guess_line_endings) {
	    my $buf = '';
	    my $nread = read FILE, $buf, 512;
	    if (!defined $nread) {
		xdie "error reading from $File_name:";
	    }
	    elsif (!$nread) {
		$/ = "\n"; # won't matter
	    }
	    elsif ($buf =~ /^([^\x0d\x0a]*(\x0d\x0a|\x0d|\x0a))/) {
		$/ = $2;
	    }
	    else {
	    	xwarn "can't intuit line endings for $File_name";
		$/ = "\n";
	    }
	    seek FILE, 0, 0
		or xdie "can't rewind $File_name:";
	    debug "line endings are ", dstr $/;
	}

	($new_file, $new_fh) = create_tmp $File_name;
	push @@Tmp, $new_file;

	# XXX These are a security hole when this is run as root on user's
	# files.  I need fchmod() and fchown().
	chmod $stat[ST_MODE], $new_file
	    or xdie "can't chmod $new_file:";
	chown @@stat[ST_UID, ST_GID], $new_file
	    or xdie "can't chmod $new_file:";

	$Msg_num = $n_kept = 0;
	while (my ($orig_head, $clen) = mbox_read_head *FILE) {
	    my ($head, $body, $msg, $keep, $delivery_time);
	    %Message_info = ();

	    touch_locks;

	    my $read_body = sub {
		$body = mbox_read_body *FILE, 0, $clen;
		$msg = "$orig_head$/$body";
	    };

	    ($head = $orig_head) =~ s|$/[ \t]+| |g;
	    $Msg_num++;
	    $Message_info{rhead} = \$head;

	    # I'm not using Mail::Header because it doesn't handle
	    # From_.*\n>From headers.
	    if ($any_date) {
	    	parse_from_line;
		$delivery_time = $Message_info{delivery_time};
	    }

	    $keep = 0;
	    for my $rrule (@@rule) {
		my ($rule, @@arg) = @@$rrule;

		# This is done a little backwards.  The default is to
		# purge messages.  If a rule matches (meaning to purge
		# this message) a simple next is done.  Any rule which
		# doesn't match (meaning to keep this message) falls to
		# the bottom from whence the loop is exited (since I
		# only purge if all rules match).
		if ($rule eq 'before') {
    	    	    next if defined $delivery_time && $delivery_time < $arg[0];
		}
		elsif ($rule eq 'before-or-at') {
    	    	    next if defined $delivery_time && $delivery_time <= $arg[0];
		}
		elsif ($rule eq 'after') {
    	    	    next if defined $delivery_time && $delivery_time > $arg[0];
		}
		elsif ($rule eq 'after-or-at') {
    	    	    next if defined $delivery_time && $delivery_time >= $arg[0];
		}
		elsif ($rule eq 'pattern') {
		    $read_body->() if !defined $body;
    	    	    next if $arg[0]->(\$msg);
		}
		elsif ($rule eq 'head-pattern') {
    	    	    next if $arg[0]->(\$head);
		}
		elsif ($rule eq 'body-pattern') {
		    $read_body->() if !defined $body;
    	    	    next if $arg[0]->(\$body);
		}
		elsif ($rule eq 'eval') {
		    $read_body->() if !defined $body;
		    #print data_dump ['real args', \$head, \$body, \$msg];
		    next if $arg[0]->(\$head, \$body, \$msg);
		}
		elsif ($rule eq 'head-eval') {
		    #print data_dump ['real args', \$head];
		    next if $arg[0]->(\$head);
		}
		else {
		    xdie "bug: bad rule `$rule'\n";
		}
		# This rule didn't match, therefore keep this message.
		$keep = 1;
		last;
	    }

	    if (!$keep) {
		debug "$Msg_num purge";
	    	if (defined $Copy_mbox && !$No) {
		    # XXX it'd be better to have mbox_append be able
		    # to work with the callback-using version of
		    # mbox_read_body so you didn't have to keep the
		    # message in memory
		    $read_body->() if !defined $body;
		    mbox_append $Copy_mbox, \$msg;
		}
		else {
		    mbox_read_body *FILE, 1, $clen if !defined $body;
		}
		next;
	    }

    	    debug "$Msg_num keep";
	    if (defined $body) {
	    	debug "$Msg_num already read";
		print $new_fh mbox_escape $msg
		    or xdie "error writing to $new_file:";
	    }
	    else {
	    	debug "$Msg_num using callback";
		print $new_fh $orig_head, $/
		    or xdie "error writing to $new_file:";
		my $len = mbox_read_body *FILE, sub {
			mbox_escape_body_part_in_place $_[0];
			print $new_fh $_[0]
			    or xdie "error writing to $new_file:";
		    }, $clen;
		if ($len) {
		    print $new_fh $/
			or xdie "error writing to $new_file:";
		}
	    }
	    $n_kept++;
	}
	my $n_dropped = $Msg_num - $n_kept;

	close $new_fh
	    or xdie "error closing $new_file:";

	@@stat2 = stat FILE
	    or xdie "error doing stat 2 on open $File_name:";

	$stat[ST_MTIME] == $stat2[ST_MTIME]
	    	&& $stat[ST_SIZE] == $stat2[ST_SIZE]
	    # XXX probably shouldn't remove the lock file in this case,
	    # or use the open handle on it I want to use and funlink
	    # or at least try (race!) to see if it's mine first
	    or xdie "$File_name was modified while I had it locked\n";

	close FILE
	    or xdie "error closing $File_name:";

	@@Tmp = grep { $_ ne $new_file } @@Tmp;
	if ($No || $n_dropped == 0) {
	    unlink $new_file
		or xdie "error unlinking $new_file:"
	}
	else {
	    rename $new_file, $File_name
		or xdie "error renaming $new_file to $File_name:";

	    # $file.msf is a Mozilla index file, it isn't valid since
	    # $file has changed.  Remove it so Mozilla will regenerate
	    # it.

	    my $msf = "$File_name.msf";
	    unlink $msf
		or $! == ENOENT
    	    	or xwarn "error unlinking $msf:";

	    unlink $File_name
		    or xdie "error unlinking $File_name:"
		if $n_kept == 0;
	}

	unlock_file $File_name;

	info sprintf "%5d kept  %5d discarded  %s",
		$n_kept, $n_dropped, $File_name;
    }

    return 0;
}

END {
    for (@@Tmp) {
	unless (unlink) {
	    xwarn "error unlinking $_:";
	    $? = 1 unless $?;
	}
    }
}

$Exit = main || $Exit;
$Exit = 1 if $Exit and not $Exit % 256;
exit $Exit;

__END__

=head1 NAME

mbox-purge - perform batch deletion of mail messages from mbox files

=head1 SYNOPSIS

B<mbox-purge>
[B<--copy-to> I<file>]
[B<--debug>]
[B<--help>]
[B<--newline> I<s>]
[B<--newline-native>]
[B<--no>] [B<-n>]
[B<--quiet>]
[B<--verbose>]
[B<--version>]
[B<--before> I<date-time>]
[B<--before-or-at> I<date-time>]
[B<--after> I<date-time>]
[B<--after-or-at> I<date-time>]
[B<--pattern> I<pat>]
[B<--head-pattern> I<pat>]
[B<--body-pattern> I<pat>]
[B<--eval> I<code>]
[B<--head-eval> I<code>]
I<file>...

=head1 DESCRIPTION

B<mbox-purge> performs batch deletion of email messages from mbox format
files based on rules you specify.  It uses F<file.lock>-style locking
(using B<procmail>'s B<lockfile> under the hood).  Because of this you
have to have write permission in the directory in which the I<file>
being processed is stored.

The file to be processed can be in mbox, mboxrd, mboxcl, mboxcl2 or
buggy Elm mboxcl2 format.  The data written will always be in mboxrd
format.  See http://www.qmail.org/qmail-manual-html/man5/mbox.html for
an explanation of these terms.

=head1 OPTIONS - GENERAL

=over

=item B<--copy-to> I<file>

=item B<-c> I<file>

Append a copy of each deleted message to I<file>.

=item B<--debug>

Turn debugging on.

=item B<--help>

Show the help and die.

=item B<--newline> I<s>

Use s as the line terminator rather than guessing based on the first
line of each file.

=item B<--newline-native>

Use this platform's native line terminator rather than guessing based on
the first line of each file.

=item B<--no>

=item B<-n>

Don't actually modify any files, just go through the motions.

=item B<--quiet>

Suppress informational messages.

=item B<--verbose>

Output additional informational messages.

=item B<--version>

Show the version and exit.

=back

=head1 OPTIONS - MESSAGE SELECTION

If multiple rules are given they all have to match for a message to be
purged.

=over

=item B<--before> I<date-time>

=item B<--before-or-at> I<date-time>

=item B<--after> I<date-time>

=item B<--after-or-at> I<date-time>

Purge messages which were delivered in the specified period.  The
date/time parsing is flexible, as provided by Date::Parse.  I usually
use I<YYYY>-I<MM>-I<DD> I<HH>:I<MM>:I<SS>.  If you leave off the time
it defaults to 00:00:00.

=item B<--pattern> I<pat>

Purge messages which match I<pat>.  The pattern is run against the message
after its mbox-style encoding has been unescaped.  The match uses
Perl's //m flag.

=item B<--head-pattern> I<pat>

Purge messages which match I<pat>.  The pattern is run against the message
after its mbox-style encoding has been unescaped.  The match uses
Perl's //m flag.

Additionally, the headers have line continuations undone (newline
followed by whitespace is replaced with a single space) before the
match.

=item B<--body-pattern> I<pat>

Purge messages whose bodies match I<pat>.  The pattern is run against
the message after its mbox-style encoding has been unescaped.  The match
uses Perl's //m flag.

=item B<--eval> I<code>

Evaluate I<code> and purge the message if it returns true.  I<code> is
compiled as the body of a subroutine.  The subroutine receives references
to the head, body and full text of the message as its arguments.  The head
argument has had continuation lines undone, and the body in both of the
second arguments has had its mbox encoding unescaped.  See
also L</CONVENIENCE SUBS>.

=item B<--head-eval> I<code>

Like B<--eval> but the only arg passed to the I<code> is a reference to
the message header.  Use this if you don't need the message body, to
avoid having to put it in memory.

=back

=head1 CONVENIENCE SUBS

Here are some subs you can use from code passed in via B<-eval>:

=over

=item B<delivery_time>

Return the epoch time() when the message was delivered, as read
from the From_ line.

=item B<envelope_sender>

Return the envelope sender, as read from the From_ line.

=item B<file_name>

Return the name of the file being processed.

=item B<header_all> I<header_pattern>

Return the data part of all the header lines whose field names match
I<header_pattern>.  Eg,

    my @@recv = header_all 'Received';

=item B<header_first> I<header_pattern>

=item B<header_last> I<header_pattern>

These are like B<header_all>, but they only return the first or last
matching header.

=item B<mbox_append> I<file>, I<ref-to-string>

Append the I<ref-to-string> to the mbox-format I<file>, doing appropriate
escaping and locking.  Typically I<ref-to-string> will be C<$_[2]>.

=item B<mozilla_expunged>

True if the message has been marked as expunged by Mozilla.  Such
messages are effectively deleted, but haven't been removed from the
mailbox file yet.

=item B<msg_num>

Return the index of this message in the current file (starts at 1).

=item B<parse_date> I<str>

Return the epoch time() which corresponds to I<str>, or B<undef>.

=back

=head1 EXAMPLES

    # Delete messages older than the given date from all your folders.

    mbox-purge --before 2000-05-01 ~/Mail/*

    # Delete messages from April 2000.

    mbox-purge --after-or-at 2000-04-01 --before 2000-05-01 file

    # Move messages delivered in the year 2000 from the listed files
    # into a single file.

    mbox-purge --after-or-at 2000-01-01 --before 2001-01-01 \
    	--copy-to 2000.mbox file1 file2 file3

    # Delete a chain letter from all user's mailboxes.

    mbox-purge \
	--head-pattern '^Subject: (Re: )?GOOD LUCK TOTEM( \(fwd\))?$' \
    	/var/spool/mail/*

    # Perform equivalent of Mozilla folder compaction.

    mbox-purge --eval mozilla_expunged file

    # Delete messages larger than 1M.

    mbox-purge --eval 'length ${ $_[2] } > 1_000_000' file

    # Delete messages older than 6 months from all your folders.

    mbox-purge --eval 'time - delivery_time > 60*60*24 * 31 * 6' ~/Mail/*

    # Same, but use the Date: field's date rather than the delivery date.

    mbox-purge --eval 'time - parse_date(header_first "Date")
    	    	    	> 60*60*24 * 31 * 6' ~/Mail/*

    # Move messages older than 6 months into by-year archive folders, keeping
    # separate archives for each source folder.  Eg, for ~/Mail/sent you
    # get ~/Mail/sent.2000, ~/Mail/sent.2001, etc.

    find ~/Mail ! -name '*.[0-9][0-9][0-9][0-9]' ! -name '.*' -type f \
    	-print0 | xargs -0r mbox-purge \
	--eval '
	    return 0 if time - delivery_time() < 60*60*24 * 31 * 6;
	    require POSIX;
	    my $year = POSIX::strftime("%Y", localtime delivery_time);
	    mbox_append file_name . ".$year", $_[2];
	    1'

=head1 BUGS

You can't delete from your mail spool on a system which doesn't have a
world-writable spool directory if you're a regular user, both because
B<mbox-purge> doesn't know to special-case B<lockfile>'s invocation for
that and because it creates the temporary file in the same directory as
the file it is processing.

=head1 CHANGES

  $Log: mbox-purge,v $
  Revision 1.22  2007-04-06 16:19:54-04  roderick
  Add --copy-to, --before-or-at, --after-or-at.

  Add convenience subs:  mbox_append(), file_name(), msg_num().

  Touch lock files periodically so other processes don't think they're
  stale.

  Revision 1.21  2006-11-22 14:13:00-05  roderick
  Support differing line terminators ($/); guess the right value on a
  file-by-file basis by default.  Add --newline, --newline-native to
  override this.

  Avoid holding a whole message body in memory when possible by using the
  (new) callback-enabled version of mbox_read_body().  Add --head-eval to
  make it possible in more cases.

  Add -n as alias for --no.

  Add mozilla_expunged() convenience sub.

  Revision 1.20  2006-10-17 10:07:20-04  roderick
  Use chompexpr_fileline.

  Revision 1.19  2006-09-08 09:31:39-04  roderick
  Also check the file's size when detecting modifications.

  When you modify a mailbox remove a .msf file (Mozilla index) if present.

  Revision 1.18  2005-03-01 11:57:15-05  roderick
  Oops, set $File_name correctly.

  Revision 1.17  2004-09-02 10:49:57-04  roderick
  Important changes:

      Use the delivery date rather than the Date: header for --before and
      --after.

      Add and prefer --switch to -switch, but still allow the latter for
      old switches.

      Add convenience subs:  delivery_time(), envelope_sender(), header_all(),
      header_first(), header_last().

      Add --help, --no, --quiet, --verbose, --version.

  Less important:

      If no messages were purged from a file, leave it as is rather than
      replacing it with the new (identical) copy.

      Treat a parsed date of -1 as undef.

      Add %Message_info, $File_name, $Msg_num.

      Improve the usage message.

      For --eval, turn off strict vars, and don't let the user get at my
      lexicals.

      Don't trap signals which were ignored.


=head1 AUTHOR

Roderick Schertler <F<roderick@@argon.org>>

=cut
@


1.22
log
@Add --copy-to, --before-or-at, --after-or-at.

Add convenience subs:  mbox_append(), file_name(), msg_num().

Touch lock files periodically so other processes don't think they're
stale.
@
text
@d4 1
a4 1
# $Id: mbox-purge,v 1.21 2006-11-22 14:13:00-05 roderick Exp roderick $
d67 1
a67 1
my $Version	 = q$Revision: 1.21 $ =~ /(\d\S+)/ ? $1 : '?';
d793 1
a793 1
escaping and locking.  Typicall I<ref-to-string> will be C<$_[2]>.
d874 8
@


1.21
log
@Support differing line terminators ($/); guess the right value on a
file-by-file basis by default.  Add --newline, --newline-native to
override this.

Avoid holding a whole message body in memory when possible by using the
(new) callback-enabled version of mbox_read_body().  Add --head-eval to
make it possible in more cases.

Add -n as alias for --no.

Add mozilla_expunged() convenience sub.
@
text
@d4 1
a4 1
# $Id: mbox-purge,v 1.20 2006-10-17 10:07:20-04 roderick Exp roderick $
d33 1
d43 4
a46 2
     --before date	delivered before date
     --after date	delivered after  date
d58 1
d62 1
d67 1
a67 1
my $Version	 = q$Revision: 1.20 $ =~ /(\d\S+)/ ? $1 : '?';
d116 47
d261 37
d308 4
d340 1
a340 1
	elsif (/^--?(before|after)\z/) {
d391 1
a391 1
	my ($lock_file, @@stat, $new_file, $new_fh, $n_kept, @@stat2);
d396 1
a396 7
	$lock_file = "$File_name.lock";
	system qw(lockfile -1 -r 10), $lock_file;
	waitstat_die $?, "lockfile for $lock_file";
	push @@Tmp, $lock_file;

	# XXX keep a handle on the lock file open and periodically touch
	# it?
d439 2
d470 3
d476 3
d508 12
a519 1
		mbox_read_body *FILE, 1, $clen if !defined $body;
d523 1
d586 1
a586 2
	unlink $lock_file	or xdie "error unlinking $lock_file:";
	@@Tmp = grep { $_ ne $lock_file } @@Tmp;
d617 1
d622 1
a622 1
[B<--no>]
d626 4
a629 2
[B<--before> I<date>]
[B<--after> I<date>]
d654 6
d680 2
d705 1
a705 1
=item B<--before> I<date>
d707 1
a707 1
Purge messages delivered before I<date>.
d709 1
a709 1
=item B<--after> I<date>
d711 6
a716 1
Purge messages delivered after I<date>.
d772 4
d790 5
d801 4
d819 7
a825 1
    mbox-purge --before 2000-05-01 --after 2000-03-31 file
d833 4
d843 1
a843 1
    mbox-purge --eval 'time - delivery_time > 60*60*24 * 30 * 6' ~/Mail/*
d848 1
a848 3
    	    	    	> 60*60*24 * 30 * 6' ~/Mail/*

    # Perform equivalent of Mozilla folder compaction.
d850 12
a861 1
    mbox-purge --eval mozilla_expunged file
d871 1
a871 1
=head1 TODO
d873 9
a881 1
  - Add --purged-to (name?) to output purged messages somewhere?
d883 3
a885 1
=head1 CHANGES
a886 1
  $Log: mbox-purge,v $
@


1.20
log
@Use chompexpr_fileline.
@
text
@d4 1
a4 1
# $Id: mbox-purge,v 1.19 2006-09-08 09:31:39-04 roderick Exp roderick $
d15 3
a17 3
			    badinvo chompexpr_fileline exclusive_create
			    mbox_read_head mbox_read_body mbox_escape
			    xdie);
d33 8
a40 6
    --debug		turn debugging on
    --help		show the help and die
    --no		don't actually modify any files
    --quiet		suppress informational messages
    --verbose		output additional informational messages
    --version		show the version and exit
d42 7
a48 6
    --before date	delivered before date
    --after date	delivered after  date
    --pattern pat	head+body match   Perl regex /pat/m
    --head-pattern pat	head      matches Perl regex /pat/m
    --body-pattern pat	body      matches Perl regex /pat/m
    --eval code		\$code->(\\\$head, \\\$body, \\\$msg) returns true
d57 1
d62 1
a62 1
my $Version	 = q$Revision: 1.19 $ =~ /(\d\S+)/ ? $1 : '?';
d200 9
d225 10
a234 1
	elsif ($_ eq '--no') {
d276 1
a276 1
	elsif (/^--?(eval)\z/) {
d283 1
a283 1
		xdie "invalid -eval code `$code': $@@\n";
d294 3
d314 23
d354 1
a354 1
		$msg = "$orig_head\n$body";
d357 1
a357 1
	    ($head = $orig_head) =~ s/\n[ \t]+/ /g;
d397 1
d400 4
d416 20
a435 1
	    $read_body->() if !defined $body;
a436 2
	    print $new_fh mbox_escape $msg
			or xdie "error writing to $new_file:";
d513 2
d525 1
d553 10
d627 6
d664 6
d705 4
a716 3
mboxcl2 format is currently broken, RS::Handy::mbox_read_body() needs to
be fixed.

d724 3
@


1.19
log
@Also check the file's size when detecting modifications.

When you modify a mailbox remove a .msf file (Mozilla index) if present.
@
text
@d4 1
a4 1
# $Id: mbox-purge,v 1.18 2005-03-01 11:57:15-05 roderick Exp roderick $
d14 4
a17 2
use RS::Handy		qw(:stat $Me xdie badinvo exclusive_create
			    mbox_read_head mbox_read_body mbox_escape);
d58 1
a58 1
my $Version	 = q$Revision: 1.18 $ =~ /(\d\S+)/ ? $1 : '?';
d249 2
a250 2
		$@@ =~ s/ at .eval \d+. line \d+.\n//;
		xdie "invalid pattern `$pat': $@@\n";
d283 3
d378 3
d628 5
@


1.18
log
@Oops, set $File_name correctly.
@
text
@d4 1
a4 1
# $Id: mbox-purge,v 1.17 2004-09-02 10:49:57-04 roderick Exp roderick $
d56 1
a56 1
my $Version	 = q$Revision: 1.17 $ =~ /(\d\S+)/ ? $1 : '?';
d372 1
d386 10
d620 3
@


1.17
log
@Important changes:

    Use the delivery date rather than the Date: header for --before and
    --after.

    Add and prefer --switch to -switch, but still allow the latter for
    old switches.

    Add convenience subs:  delivery_time(), envelope_sender(), header_all(),
    header_first(), header_last().

    Add --help, --no, --quiet, --verbose, --version.

Less important:

    If no messages were purged from a file, leave it as is rather than
    replacing it with the new (identical) copy.

    Treat a parsed date of -1 as undef.

    Add %Message_info, $File_name, $Msg_num.

    Improve the usage message.

    For --eval, turn off strict vars, and don't let the user get at my
    lexicals.

    Don't trap signals which were ignored.
@
text
@d4 1
a4 1
# $Id$
d56 1
a56 1
my $Version	 = q$Revision$ =~ /(\d\S+)/ ? $1 : '?';
d270 1
a270 1
    for $File_name (@@ARGV) {
d273 1
d608 31
a638 1
  $Log$
@


1.16
log
@Oops, use parse_date() rather than getdate() when parsing the Date:
field of the messages.

Oops, messages with bad dates should never match -before/-after.
@
text
@d4 1
a4 1
# $Id: mbox-purge,v 1.15 2002-12-13 17:18:29-05 roderick Exp roderick $
d10 1
a10 1
use sigtrap qw(die normal-signals);
d14 1
a14 1
use RS::Handy		qw(:stat $Me xdie exclusive_create
d17 33
d52 1
d56 6
d73 1
a73 1
    die "usage: $Me [switch]... mbox...\n";
d126 3
d139 55
d204 1
a204 1
	elsif ($_ eq '-debug') {
d207 18
a224 2
	elsif ($_ eq '-before' || $_ eq '-after') {
	    my $rule = $_;
d238 2
a239 2
	elsif ($_ =~ /^-(head-|body-)?pattern$/) {
	    my $rule = $_;
d252 2
a253 2
	elsif ($_ eq '-eval') {
	    my $rule = $_;
d256 1
a256 1
	    my $sub = eval "sub { $code }";
d264 1
a264 1
	    xdie "invalid switch $_\n";
d270 2
a271 2
    for my $file (@@ARGV) {
	my ($lock_file, @@stat, $new_file, $new_fh, $n_tot, $n_kept, @@stat2);
d273 1
a273 1
	verbose "processing $file";
d275 1
a275 1
	$lock_file = "$file.lock";
d280 2
a281 2
	open FILE, $file or xdie "can't read $file:";
	@@stat = stat FILE or xdie "error statting open $file:";
d283 1
a283 1
	($new_file, $new_fh) = create_tmp $file;
d286 2
a287 1
	# XXX These are a security hole.  I need fchmod() and fchown().
d293 1
a293 1
	$n_tot = $n_kept = 0;
d295 2
a296 1
	    my ($head, $body, $msg, $keep, $date, $time);
d304 2
a305 1
	    $n_tot++;
d310 2
a311 9
		$date = ($head =~ /^Date\s*:\s*(.*\S)/mi) ? $1 : undef;
		defined $date
		    or info "no date header in message $n_tot of $file";

		if (defined $date) {
		    $time = parse_date $date;
		    defined($time) && $time > 0
			or info "invalid date in message $n_tot of $file ($date)";
		}
d324 2
a325 2
		if ($rule eq '-before') {
    	    	    next if defined $time && $time < $arg[0];
d327 2
a328 2
		elsif ($rule eq '-after') {
    	    	    next if defined $time && $time > $arg[0];
d330 1
a330 1
		elsif ($rule eq '-pattern') {
d334 1
a334 1
		elsif ($rule eq '-head-pattern') {
d337 1
a337 1
		elsif ($rule eq '-body-pattern') {
d341 1
a341 1
		elsif ($rule eq '-eval') {
d362 1
d368 1
a368 1
	    or xdie "error doing stat 2 on open $file:";
d371 1
a371 1
	    or xdie "$file was modified while I had it locked\n";
d374 1
a374 1
	    or xdie "error closing $file:";
d377 11
a387 4
	rename $new_file, $file	or xdie "error renaming $new_file to $file:";

	unlink $file		or xdie "error unlinking $new_file:"
	    if $n_kept == 0;
d393 1
a393 1
		$n_kept, $n_tot - $n_kept, $file;
d420 14
a433 3
B<mbox-purge> [B<-before> I<date>] [B<-after> I<date>] [B<-pattern>
<pat>] [B<-head-pattern> I<pat>] [B<-body-pattern> I<pat>] [B<-eval>
<code>] I<file>...
d439 3
a441 3
(using B<lockfile> under the hood).  Because of this you have to have
write permission in the directory in which the I<file> being processed
is stored.
d448 31
a478 1
=head1 OPTIONS
d485 1
a485 1
=item B<-before> I<date>
d487 1
a487 1
Purge messages whose Date header specifies a time before I<date>.
d489 1
a489 1
=item B<-after> I<date>
d491 1
a491 1
Purge messages whose Date header specifies a time after I<date>.
d493 1
a493 1
=item B<-pattern> I<pat>
d499 1
a499 1
=item B<-head-pattern> I<pat>
d509 1
a509 1
=item B<-body-pattern> I<pat>
d515 1
a515 1
=item B<-eval> I<code>
d519 39
a557 3
to the head, body and full text of the message as its arguments.  The
head argument has had continuation lines undone, and the body in both of
the second arguments has had its mbox encoding unescaped.
d563 3
a565 2
    # Delete old messages from all your folders.
    mbox-purge -before 5/1/2000 ~/Mail/*
d568 2
a569 1
    mbox-purge -before 5/1/2000 -after 3/31/2000 file
d572 1
d574 1
a574 1
	-head-pattern '^Subject: (Re: )?GOOD LUCK TOTEM( \(fwd\))?$' \
d578 11
a588 1
    mbox-purge -eval 'length ${ $_[2] } > 1_000_000' file
d597 11
@


1.15
log
@Use Date::Parse if possible, else Date::GetDate.
@
text
@d4 1
a4 1
# $Id: mbox-purge,v 1.14 2002-05-06 10:43:59-04 roderick Exp roderick $
d88 3
a90 2
	my $out = localtime $t;
	debug "$in -> $t ($out)";
d198 1
a198 1
		    $time = getdate $date;
d200 1
a200 1
			or info "invalid date in message $n_tot of $file";
d215 1
a215 2
		    next unless defined $time;
    	    	    next if $time < $arg[0];
d218 1
a218 2
		    next unless defined $time;
    	    	    next if $time > $arg[0];
@


1.14
log
@s/perl5/perl/
@
text
@d4 1
a4 1
# $Id: mbox-purge,v 1.13 2002-01-21 13:35:41-05 roderick Exp roderick $
a11 1
use Date::GetDate	qw(getdate);
d23 3
d65 30
d105 3
d112 2
a113 2
	    my $time = getdate $spec;
	    $time > 0 or xdie "invalid time `$spec'\n";
@


1.13
log
@Choke on -before/-after values in the future.

Support -- switch.
@
text
@d1 1
a1 1
#!/usr/bin/perl5 -w
d4 1
a4 1
# $Id: mbox-purge,v 1.12 2001-09-13 14:31:42-04 roderick Exp roderick $
@


1.12
log
@Rename RJS::Handy to RS::Handy.
@
text
@d4 1
a4 1
# $Id: mbox-purge,v 1.11 2001-07-27 12:24:39-04 roderick Exp roderick $
d70 4
a73 1
	if ($_ eq '-before' || $_ eq '-after') {
d79 6
@


1.11
log
@Don't use 2 digit years in the examples.
@
text
@d4 1
a4 1
# $Id: mbox-purge,v 1.10 2000-05-30 18:09:27-04 roderick Exp roderick $
d15 1
a15 1
use RJS::Handy		qw(:stat $Me xdie exclusive_create
d25 1
a25 1
    RJS::Handy::xwarn @@_;
@


1.10
log
@Split mbox_read() into mbox_read_head() and mbox_read_body().  Use these
to avoid keeping the body of a message which is definitely going to e
skipped in memory.

Don't call getdate() if not necessary.
@
text
@d4 1
a4 1
# $Id: mbox-purge,v 1.9 2000-05-04 12:41:35-04 roderick Exp roderick $
d326 1
a326 1
    mbox-purge -before 5/1/90 ~/Mail/*
d328 2
a329 2
    # Delete messages from April 1990.
    mbox-purge -before 5/1/90 -after 3/31/90 file
@


1.9
log
@Avoid undefined warning for bogus $date.
@
text
@d4 1
a4 1
# $Id: mbox-purge,v 1.8 1999-04-10 04:56:18-04 roderick Exp roderick $
d16 1
a16 1
			    mbox_read mbox_escape);
d64 1
a64 1
    my (@@rule);
d77 1
d134 2
a135 2
	while (my ($head, $body) = mbox_read \*FILE) {
	    my ($msg, $keep, $date, $time);
d137 4
a140 2
	    $msg = "$head\n$body";
	    $head =~ s/\n[ \t]+/ /g;
d142 1
d147 10
a156 7
	    $date = ($head =~ /^Date\s*:\s*(.*\S)/mi) ? $1 : undef;
	    defined $date or info "no date header in message $n_tot of $file";

	    if (defined $date) {
		$time = getdate $date;
		defined($time) && $time > 0
		    or info "invalid date in message $n_tot of $file";
d178 1
d185 1
d189 1
d200 5
a204 1
	    next unless $keep;
@


1.8
log
@Oops, use \"" not "" when calling the matching sub to test.
@
text
@d4 1
a4 1
# $Id: mbox-purge,v 1.7 1999-03-25 11:22:10-05 roderick Exp roderick $
d85 1
a85 1
	    eval { $sub->(\"") };
d146 5
a150 3
	    $time = defined($date) ? getdate $date : undef;
	    $time > 0 or info "invalid date in message $n_tot of $file"
		if defined $date;
@


1.7
log
@Add -eval.

Pass references instead of strings to the pattern closures.
@
text
@d4 1
a4 1
# $Id: mbox-purge,v 1.6 1997-12-01 13:43:59-05 roderick Exp roderick $
d85 1
a85 1
	    eval { $sub->("") };
@


1.6
log
@Oops, -pattern wasn't recognized.

Choke if no rules are specified.

Oops, the logic was only partially converted, it was using $skip but
$skip was no longer set anywhere.  Switch to $keep.
@
text
@d4 1
a4 1
# $Id: mbox-purge,v 1.5 1997-11-07 13:28:52-05 roderick Exp roderick $
d82 1
a82 1
	    my $sub = eval 'sub { $_[0] =~ /$pat/om }';
d92 11
d169 1
a169 1
    	    	    next if $arg[0]->($msg);
d172 1
a172 1
    	    	    next if $arg[0]->($head);
d175 4
a178 1
    	    	    next if $arg[0]->($body);
d243 3
a245 2
B<mbox-purge> [B<-before> I<date>] [B<-after> I<date>] [B<-pattern> <pat>]
[B<-head-pattern> I<pat>] [B<-body-pattern> I<pat>] I<file>...
d297 8
d319 3
@


1.5
log
@Add POD.

Add -pattern and -body-pattern, fix -head-pattern to validate and
compile.
@
text
@d4 1
a4 1
# $Id: mbox-purge,v 1.4 1997-10-13 12:05:11-04 roderick Exp roderick $
d78 1
a78 1
	elsif ($_ =~ /^-(head-|body-)pattern$/) {
d96 1
d123 1
a123 1
	    my ($msg, $skip, $date, $time);
d139 1
d143 6
d151 1
a151 1
    	    	    next unless $time < $arg[0];
d155 1
a155 1
    	    	    next unless $time > $arg[0];
d158 1
a158 1
    	    	    next unless $arg[0]->($msg);
d161 1
a161 1
    	    	    next unless $arg[0]->($head);
d164 1
a164 1
    	    	    next unless $arg[0]->($body);
d169 3
a171 1
		last if $skip;
d174 1
a174 1
	    next if $skip;
d248 1
a248 1
purge.
@


1.4
log
@Add -head-pattern.
@
text
@d4 1
a4 7
# $Id: mbox-purge,v 1.3 1997-07-18 15:13:39-04 roderick Exp roderick $

# This is an mbox purging program.  Simple usage is
#
#     mbox-purge -before 5/1/97 ~/Mail/*
#
# to delete all the messages in those folders before that date.
d6 3
a8 7
# It does do locking, but elm doesn't, so it's not safe if elm is
# running on the folders being processed.  It should be safe on a
# system mailbox.

# XXX You can't purge your mail spool as a regular user both because this
# script doesn't know to special case lockfile's -ml/-mu and because it
# tries to create the new file in the same directory as the current file.
d78 1
a78 1
	elsif ($_ eq '-head-pattern') {
d81 10
a90 2
	    # XXX validate
	    push @@rule, [$rule, shift @@ARGV];
a127 1
	    $skip = 0;
d143 1
a143 1
		    $skip = 1 if $time < $arg[0];
d147 4
a150 1
		    $skip = 1 if $time > $arg[0];
d153 4
a156 2
		    # XXX compile only once
		    $skip = 1 if $head =~ /$arg[0]/m;
d210 90
@


1.3
log
@getdate() returns -1, not undef, for error.

stat() the input file just before replacing it, balk if it was changed
since it was opened.
@
text
@d4 1
a4 1
# $Id: mbox-purge,v 1.2 1997-07-11 17:31:08-04 roderick Exp roderick $
d82 1
a82 1
	    @@ARGV or xdie "no arg for $_";
d85 1
a85 1
	    $time > 0 or xdie "invalid time `$spec'";
d88 6
d95 1
a95 1
	    xdie "invalid switch $_";
d151 4
@


1.2
log
@Re-work not to use MH, and finish up.
@
text
@d4 1
a4 1
# $Id: mbox-purge,v 1.1 1997-07-11 14:20:24-04 roderick Exp roderick $
d6 1
a6 1
# This is a mbox purging program.  Simple usage is
d13 1
a13 1
# running on the folders being processed..  It should be safe on a
d16 4
d85 1
a85 1
	    defined $time or xdie "invalid time `$spec'";
d95 1
a95 1
	my ($lock_file, @@stat, $new_file, $new_fh, $n_tot, $n_kept);
d108 2
d132 1
a132 1
	    defined $time or info "invalid date in message $n_tot of $file"
d158 13
a170 2
	close $new_fh		or xdie "error closing $new_file:";
	close FILE		or xdie "error closing $file:";
d172 4
@


1.1
log
@Initial revision
@
text
@a0 5
Yuck, this doesn't work because elm has an off by one error when creating
the content length header for messages it saves.  This is mostly but not
completely tested but I have to change implementations because of this
elm bug.

d4 5
a8 1
# $Id$
d10 5
a14 2
# XXX packmbox loses the delivery date, plus it doesn't use mboxrd
# XXX permissions
d18 5
a22 6
use RJS::Handy qw($Me xdie backtick_noshell dirents dirents_qualified plural
    	    	    exclusive_create);
use File::Path qw(mkpath rmtree);
use Proc::WaitStat qw(waitstat_die);
use String::ShellQuote qw(shell_quote);
use POSIX qw(:errno_h);
a25 1
my $Mh_dir	= undef;
d28 1
a28 1
my $Verbose	= 1;
d37 1
a37 1
    die "usage: $Me <pick args> -- <folder>...\n";
d48 2
a49 7
sub message_count {
    my $folder = shift;
    my $n = 0;
    for (dirents "$Mh_dir/$folder") {
	$n++ if /^\d+$/;
    }
    return $n;
d53 1
a53 1
    my ($orig, $type) = @@_;
d59 1
a59 5
    until (do {
		if ($type eq 'create') 	{ $fh = exclusive_create $new }
		elsif ($type eq 'link')	{ link $orig, $new }
		else { xdie "invalid create_tmp type $type\n" }
	    }) {
d66 1
a66 1
    return $new;
d70 1
a70 1
    my (@@pick);
a72 1
    #$ARGV[0] =~ /^-/ or usage "first pick arg doesn't start with dash\n";
d74 1
a74 1
    while (@@ARGV) {
d76 11
a86 36
	last if $_ eq '--';
	push @@pick, $_;
    }

    @@pick or usage "no pick args specified\n";
    @@ARGV or usage "no folder specified\n";

    my @@lock;
    for my $file (@@ARGV) {
	stat $file or xdie lstat($file)
				? "$file is a broken symlink\n"
				: "$file doesn't exist\n";
	-f _ or xdie "$file isn't a file\n";
	-r _ or xdie "$file isn't readable\n";
	push @@lock, "$file.lock";
    }
    info "locking ", 0+@@lock, " file", plural(0+@@lock);
    system 'lockfile', '-r', '3', @@lock;
    waitstat_die $?, 'lockfile';
    push @@Tmp, @@lock;

    $ENV{PATH} = "/usr/bin/mh:$ENV{PATH}";
    umask 066;

    $Mh_dir = backtick_noshell 'mktemp', '-p', $Me;
    waitstat_die $?, 'mktemp';
    chomp $Mh_dir;
    length($Mh_dir) or xdie "no data from mktemp\n";
    mkpath $Mh_dir;
    mkpath "$Mh_dir/tmp";

    $ENV{MH} = "$Mh_dir/profile";
    unless (open PROFILE, ">$ENV{MH}"
		and print PROFILE "Path: $Mh_dir\n"
		and close PROFILE) {
	xdie "error creating $ENV{MH}:";
d88 1
a89 3
    my ($folder, $dir);
    $folder = 'tmp';
    $dir = "$Mh_dir/$folder";
d91 1
a91 1
	info "processing $file";
d93 1
a93 14
    	my $in_file = create_tmp $file, 'link';
	system 'inc', '-silent', '-file', $in_file, "+$folder";
	waitstat_die $?, "inc for $file";
	unlink $in_file or xdie "error unlinking $in_file:";
	my $n_orig = message_count $folder;
	verbose "  $n_orig message", plural($n_orig);

	# @@#$(& pick doesn't provide a way to turn off the "no messages
	# match specification" message.  It's too much of a hassle to
	# ignore this error message but allow others (I need a backtick
	# function which returns stderr separate from stdout) so I'm
	# just tossing the error messages.
	my $cmd = '2>/dev/null pick -list ' . shell_quote(@@pick);
	my $list = `$cmd`;
d95 44
a138 30
	if ($list eq "0\n") {
	    verbose "  folder skipped, no messages match";
	}
	else {
	    waitstat_die $?, "pick for $file";
	    my @@list = split ' ', $list;

	    verbose "  ", 0+@@list, " message", plural(0+@@list), " to delete";
	    for (@@list) {
		/^\d+$/ or xdie "invalid output from pick for $folder: $_\n";
		unlink "$dir/$_" or xdie "can't unlink $dir/$_:";
	    }

	    # inc will always add a Return-Path header, even if one
	    # already exists.  So if you run a folder through this
	    # script it gets a new Return-Path each time.  Yuck.  Here
	    # I'm pruning duplicate Return-Path headers at the start of
	    # each message.  This is a yuck itself, of course, but the
	    # best idea I have at the moment.
	    verbose "  pruning extra Return-Path headers";
    	    for my $msg (grep { m-/\d+$- } dirents_qualified $dir) {
		open MSG, $msg or xdie "can't read $msg:";
		my $new_name = "$msg.tmp";
		my $new = exclusive_create $new_name
		    or xdie "can't create $new_name:";
		my %saw;
		while (<MSG>) {
		    next if /^Return-Path:\s*(.*?)\s*$/i && $saw{$1}++;
		    print $new $_ or xdie "error writing to $new_name:";
		    last if $_ eq "\n";
d140 2
a141 2
		while (<MSG>) {
		    print $new $_ or xdie "error writing to $new_name:";
d143 1
a143 4
		close MSG or xdie "error closing $msg:";
		close $new or xdie "error closing $new_name:";
		rename $new_name, $msg
		    or xdie "error renaming $new_name to $msg:";
d146 5
a150 4
	    verbose "  packing";
    	    my $pack_file = create_tmp $file, 'create';
	    system "packmbox +$folder > " . shell_quote($pack_file);
	    waitstat_die $?, "packmbox for $file";
d152 5
a156 3
	    rename $pack_file, $file
		or xdie "error renaming $pack_file to $file:";
	}
d158 2
a159 3
	for (dirents_qualified $dir) {
	    unlink or xdie "can't unlink $_:";
	}
a165 1
    rmtree $Mh_dir if defined $Mh_dir;
@
