#!/usr/bin/perl -w

# $Id: mail2news,v 1.17 2002-06-18 11:35:51-04 roderick Exp $
#
# Copyright (c) 1997 Roderick Schertler.  All rights reserved.  This
# program is free software; you can redistribute it and/or modify it
# under the same terms as Perl itself.

# This is a mail to news converter and injector.  It uses In-Reply-To to
# construct a missing References header.  It isn't really ready for
# prime time in that you have to configure the local domain name and you
# have to tell it the inews/rnews program and switches.
#
# Three things to be aware of:
#
#   - The Distribution is set to local, be careful not to leak it to
#     your neighbors.
#
#   - A Newsgroups header present in the message is retained (newsgroups
#     specified on the command line are added to it).  Actually, if
#     there's a Newsgroups line present you aren't required to specify
#     any newsgroups on the command line.
#
#   - All articles are Approved, read the comments for why.
#
# The way I use this script it to subscribe l-foo-list to the foo-list
# and then to put
#
#     l-foo-list:        "|/usr/local/lbin/mail2news mail.foo-list"
#     owner-l-foo-list:  postmaster
#
# in /etc/aliases.
#
# Roderick Schertler <roderick@argon.org>

# Local configuration section -------------------------------------------------

# If $is_inews is true then I'm piping to inews, and I need to drop Path
# and so on.  If $is_inews is false then I'm piping to rnews and I need
# to ensure I have a Path and so on.
$is_inews = 0;

$fallback_hostless_domain = 'ibcinc.com';
$use_hostless_mail_domain = 1;

if ($is_inews) {
    # These are for INN's inews, the switches take care of .signature
    # and Organizaiton.  For C news' inews you have to deal with those.
    # XXX -O doesn't actually prevent an Organization being tacked on.
    $inews_name = 'inews';
    $inews_opts = '-RSOh';
}
else {
    $inews_name = 'rnews';
    $inews_opts = '';
}

# End of configuration section ------------------------------------------------

$debug = (@ARGV && $ARGV[0] eq '-d') ? shift : 0;
($Me = $0) =~ s-.*/--;

$full_domain = $mail_domain = '';
sub cache_domain {
    unless ($full_domain) {
	local ($hostname);
	chop ($hostname = `hostname`);
	if (!$hostname) {
	    $full_domain = $fallback_hostless_domain;
	}
	elsif ($hostname =~ /\./) {
	    $full_domain = $hostname;
	}
	else {
	    $full_domain = "$hostname.$fallback_hostless_domain";
	}
	if ($use_hostless_mail_domain && $full_domain =~ /^[^.]+\.(.*)/) {
	    $mail_domain = $1;
	}
	else {
	    $mail_domain = $full_domain;
	}
    }
}

sub full_domain {
    &cache_domain;
    $full_domain
}

sub mail_domain {
    &cache_domain;
    $mail_domain
}

# Even though I don't have to sorry about .signature I'm setting $HOME,
# to fend against no specific thing but perhaps something analagous
# dead.letter.
$ENV{HOME} = '/nonesuch';

# Headers always to add.
@add = (
    "Distribution: local\n",

    # If a message comes in which already has a Newsgroups line, and if
    # it's posted to any moderated newsgroups, INN will send it off to
    # the moderator to be approved (the Distribution header doesn't
    # apply in this case).  I'm simply approving all posts which are
    # made by this script to avoid this problem.  I'm counting on the
    # Distribution from letting falsely approved articles escape
    # offsite, gods curse you if you later misconfigure a Distribution
    # leak into our news setup.
    "Approved: $Me <usenet@" . &mail_domain . ">\n",
);

@lose = (
    'Delivered-To',
    'Distribution',
    'Received',

    # I'm going to approve all postings myself
    'Approved',

    # inews won't allow
    'Xref',
    'Date-Received',
    'Received',
    'Posted',
    'Posting-Version',
    'Relay-Version',
);
$lose_re = '^(' . join ('|', @lose) . '):';

@lose_if_inews = (
    'Path',
    'NNTP-Posting-Host',
);
$lose_if_inews_re = '^(' . join ('|', @lose_if_inews) . '):';

@drop_dup = (
    'Mime-Version',
    'Content-Transfer-Encoding',
    'Sender',
);
$drop_dup_re = '^(' . join ('|', @drop_dup) . '):';

$EX_UNAVAILABLE	= 69;
$EX_TEMPFAIL	= 75;

sub ex_unavailable {
    warn @_ if @_;
    exit $EX_UNAVAILABLE;
}
sub ex_tempfail {
    warn @_ if @_;
    exit $EX_TEMPFAIL;
}

sub find_prog {
    local ($prog) = @_;
    local (@dir) = (split (/:/, $ENV{PATH}, length($ENV{PATH})),
		    "/usr/lib/news", "/usr/local/news", "/news");
    local ($path);
    for (@dir) {
	$path = ($_ ne '' ? $_ : '.') . "/$prog";
	return $path if -f $path && -x _;
    }
    undef
}

@ARGV > 1 && &ex_unavailable("usage: $Me [newsgroup]\n",
			     "Args were:\n\t", join("\n\t", @ARGV), "\n");
$newsgroup = $ARGV[0] if @ARGV == 1;

$inews = &find_prog($inews_name);
defined ($inews) || &ex_unavailable("$Me: can't find inews\n");
if ($debug) {
    $inews = 'cat';
    $inews_opts = '';
}

$tee = "/tmp/mail2news." . time . ".$$";
open (TEE, ">$tee") || ($tee = 0);

$plumber = '';
$SIG{'PIPE'} = "plumber";
sub plumber { &ex_tempfail("$Me: inews ($inews) died prematurely\n"); }

sub gen_message_id {
    "<$Me." . time . ".$$@" . &full_domain . '>'
}

sub read_stdin {
    local ($line) = scalar (<STDIN>);
    $line = '' unless defined $line;
    print TEE $line if $tee;
    $line;
}

undef $lookahead;
sub get_line {
    unless (defined $lookahead) {
	$lookahead = &read_stdin;;
    }
    local ($_) = $lookahead;
    if ($_ eq "\n") {
	undef $lookahead;
	return $_;
    }
  line:
    while (($lookahead = &read_stdin) ne '') {
	if ($lookahead =~ /^[ \t]/) {
	    $_ .= $lookahead;
	} else {
	    last line;
	}
    }
    $_;
}

sub fix_up_msgids {
    local ($_) = @_;

    # Replace illegal characters in the message id.  I think RFC 1036 is
    # actually more strict than this, but INN isn't.
    local ($replace_chars) = pack ('C*', 0..32);
    1 while s/(<[^$replace_chars>\@]*)([$replace_chars])/
	      sprintf ('%s=%02X', $1, ord ($2))/oe;

    # I often see mail message ids with a trailing . after the domain.
    # I'm not sure if that's legal in mail or not.
    1 while s/<([^>]+)\.>/<$1>/g;

    $_;
}

open (INEWS, "| $inews $inews_opts") || &ex_tempfail("$Me: can't fork: $!\n");

$saw_subject = 0;
$saw_message_id = 0;
$saw_references = 0;
$saw_path = 0;
$irt_msgid = '';	# (possible) Message-ID from In-Reply-To
@header = ();
%dup = ();

while (do { $_ = &get_line; $_ ne '' && $_ ne "\n" }) {

    # INN will choke on headers without a trailing space.  Perhaps I
    # should ditch them instead of adding the space.
    s/^([^:\s]+):$/$1: /;

    next if /$lose_re/io;
    next if $is_inews && /$lose_if_inews_re/io;
    next unless /^([a-z\d]+[a-z\d-]*):/i;
    $header = "\L$1";
    if (/$drop_dup_re/io) {
	next if defined $dup{$header};
	$dup{$header} = 1;
    }
    if ($header eq 'subject') {
	$saw_subject = 1;
    } elsif ($header eq 'message-id') {
	$_ = &fix_up_msgids ($_);
    	$saw_message_id = 1;
    } elsif ($header eq 'newsgroups') {
    	$saw_newsgroup = 1;
    	# If a newsgroup was given on the command line and it doesn't
    	# appear on the list in the article, add it.
	if ($newsgroup) {
	    chop;
	    local ($quoted);
	    foreach $group (split (/\s*,\s*/, $newsgroup)) {
		($quoted = $group) =~ s/(\W)/\\$1/g;
	    	unless ("$_ " =~ /[,\s]$quoted[,\s]/) {
		    $_ .= ",$group";
		}
	    }
	    $_ .= "\n";
	    $newsgroup = '';
	}
    } elsif ($header eq 'references') {
	$_ = &fix_up_msgids ($_);
    	$saw_references = 1;
    } elsif ($header eq 'in-reply-to' && /(<[^>\@]+\@[^>\@]+>)/) {
	# One problem with this approach is that it picks up an email
	# address in In-Reply-To.  Perhaps I should add a couple of
	# heuristics (eg, don't use it if it doesn't have any non-digits
	# in the local part).  In practice almost nobody seems to put
	# mail addresses in In-Reply-To.
	$irt_msgid = &fix_up_msgids ($1);
    }
    elsif ($header eq 'path') {
	$saw_path = 1;
    }
    push (@header, $_);
}

&ex_unavailable("$Me: didn't get newsgroup from either headers"
		. " or the command line\n")
    unless $newsgroup || $saw_newsgroup;

print INEWS @header;
print INEWS "Path: not-for-mail\n" if !$is_inews && !$saw_path;
print INEWS "Newsgroups: $newsgroup\n" if $newsgroup && !$saw_newsgroup;
print INEWS "Subject: (no subject)\n" unless $saw_subject;
print INEWS "Message-ID: ", &gen_message_id, "\n" unless $saw_message_id;
print INEWS "References: $irt_msgid\n" if $irt_msgid && !$saw_references;
print INEWS @add if @add;
print INEWS "\n";

print INEWS $lookahead if defined $lookahead && $lookahead ne '';
print INEWS while $_ = &read_stdin;

close INEWS;
unlink $tee if $tee && $? == 0;
&ex_unavailable("$Me: return $? from $inews_name\n") if $?;
exit;
