#!/usr/bin/perl -w # $Id: mail2news,v 1.17 2002-06-18 11:35:51-04 roderick Exp $ # # Copyright (c) 1997 Roderick Schertler. All rights reserved. This # program is free software; you can redistribute it and/or modify it # under the same terms as Perl itself. # This is a mail to news converter and injector. It uses In-Reply-To to # construct a missing References header. It isn't really ready for # prime time in that you have to configure the local domain name and you # have to tell it the inews/rnews program and switches. # # Three things to be aware of: # # - The Distribution is set to local, be careful not to leak it to # your neighbors. # # - A Newsgroups header present in the message is retained (newsgroups # specified on the command line are added to it). Actually, if # there's a Newsgroups line present you aren't required to specify # any newsgroups on the command line. # # - All articles are Approved, read the comments for why. # # The way I use this script it to subscribe l-foo-list to the foo-list # and then to put # # l-foo-list: "|/usr/local/lbin/mail2news mail.foo-list" # owner-l-foo-list: postmaster # # in /etc/aliases. # # Roderick Schertler # Local configuration section ------------------------------------------------- # If $is_inews is true then I'm piping to inews, and I need to drop Path # and so on. If $is_inews is false then I'm piping to rnews and I need # to ensure I have a Path and so on. $is_inews = 0; $fallback_hostless_domain = 'ibcinc.com'; $use_hostless_mail_domain = 1; if ($is_inews) { # These are for INN's inews, the switches take care of .signature # and Organizaiton. For C news' inews you have to deal with those. # XXX -O doesn't actually prevent an Organization being tacked on. $inews_name = 'inews'; $inews_opts = '-RSOh'; } else { $inews_name = 'rnews'; $inews_opts = ''; } # End of configuration section ------------------------------------------------ $debug = (@ARGV && $ARGV[0] eq '-d') ? shift : 0; ($Me = $0) =~ s-.*/--; $full_domain = $mail_domain = ''; sub cache_domain { unless ($full_domain) { local ($hostname); chop ($hostname = `hostname`); if (!$hostname) { $full_domain = $fallback_hostless_domain; } elsif ($hostname =~ /\./) { $full_domain = $hostname; } else { $full_domain = "$hostname.$fallback_hostless_domain"; } if ($use_hostless_mail_domain && $full_domain =~ /^[^.]+\.(.*)/) { $mail_domain = $1; } else { $mail_domain = $full_domain; } } } sub full_domain { &cache_domain; $full_domain } sub mail_domain { &cache_domain; $mail_domain } # Even though I don't have to sorry about .signature I'm setting $HOME, # to fend against no specific thing but perhaps something analagous # dead.letter. $ENV{HOME} = '/nonesuch'; # Headers always to add. @add = ( "Distribution: local\n", # If a message comes in which already has a Newsgroups line, and if # it's posted to any moderated newsgroups, INN will send it off to # the moderator to be approved (the Distribution header doesn't # apply in this case). I'm simply approving all posts which are # made by this script to avoid this problem. I'm counting on the # Distribution from letting falsely approved articles escape # offsite, gods curse you if you later misconfigure a Distribution # leak into our news setup. "Approved: $Me \n", ); @lose = ( 'Delivered-To', 'Distribution', 'Received', # I'm going to approve all postings myself 'Approved', # inews won't allow 'Xref', 'Date-Received', 'Received', 'Posted', 'Posting-Version', 'Relay-Version', ); $lose_re = '^(' . join ('|', @lose) . '):'; @lose_if_inews = ( 'Path', 'NNTP-Posting-Host', ); $lose_if_inews_re = '^(' . join ('|', @lose_if_inews) . '):'; @drop_dup = ( 'Mime-Version', 'Content-Transfer-Encoding', 'Sender', ); $drop_dup_re = '^(' . join ('|', @drop_dup) . '):'; $EX_UNAVAILABLE = 69; $EX_TEMPFAIL = 75; sub ex_unavailable { warn @_ if @_; exit $EX_UNAVAILABLE; } sub ex_tempfail { warn @_ if @_; exit $EX_TEMPFAIL; } sub find_prog { local ($prog) = @_; local (@dir) = (split (/:/, $ENV{PATH}, length($ENV{PATH})), "/usr/lib/news", "/usr/local/news", "/news"); local ($path); for (@dir) { $path = ($_ ne '' ? $_ : '.') . "/$prog"; return $path if -f $path && -x _; } undef } @ARGV > 1 && &ex_unavailable("usage: $Me [newsgroup]\n", "Args were:\n\t", join("\n\t", @ARGV), "\n"); $newsgroup = $ARGV[0] if @ARGV == 1; $inews = &find_prog($inews_name); defined ($inews) || &ex_unavailable("$Me: can't find inews\n"); if ($debug) { $inews = 'cat'; $inews_opts = ''; } $tee = "/tmp/mail2news." . time . ".$$"; open (TEE, ">$tee") || ($tee = 0); $plumber = ''; $SIG{'PIPE'} = "plumber"; sub plumber { &ex_tempfail("$Me: inews ($inews) died prematurely\n"); } sub gen_message_id { "<$Me." . time . ".$$@" . &full_domain . '>' } sub read_stdin { local ($line) = scalar (); $line = '' unless defined $line; print TEE $line if $tee; $line; } undef $lookahead; sub get_line { unless (defined $lookahead) { $lookahead = &read_stdin;; } local ($_) = $lookahead; if ($_ eq "\n") { undef $lookahead; return $_; } line: while (($lookahead = &read_stdin) ne '') { if ($lookahead =~ /^[ \t]/) { $_ .= $lookahead; } else { last line; } } $_; } sub fix_up_msgids { local ($_) = @_; # Replace illegal characters in the message id. I think RFC 1036 is # actually more strict than this, but INN isn't. local ($replace_chars) = pack ('C*', 0..32); 1 while s/(<[^$replace_chars>\@]*)([$replace_chars])/ sprintf ('%s=%02X', $1, ord ($2))/oe; # I often see mail message ids with a trailing . after the domain. # I'm not sure if that's legal in mail or not. 1 while s/<([^>]+)\.>/<$1>/g; $_; } open (INEWS, "| $inews $inews_opts") || &ex_tempfail("$Me: can't fork: $!\n"); $saw_subject = 0; $saw_message_id = 0; $saw_references = 0; $saw_path = 0; $irt_msgid = ''; # (possible) Message-ID from In-Reply-To @header = (); %dup = (); while (do { $_ = &get_line; $_ ne '' && $_ ne "\n" }) { # INN will choke on headers without a trailing space. Perhaps I # should ditch them instead of adding the space. s/^([^:\s]+):$/$1: /; next if /$lose_re/io; next if $is_inews && /$lose_if_inews_re/io; next unless /^([a-z\d]+[a-z\d-]*):/i; $header = "\L$1"; if (/$drop_dup_re/io) { next if defined $dup{$header}; $dup{$header} = 1; } if ($header eq 'subject') { $saw_subject = 1; } elsif ($header eq 'message-id') { $_ = &fix_up_msgids ($_); $saw_message_id = 1; } elsif ($header eq 'newsgroups') { $saw_newsgroup = 1; # If a newsgroup was given on the command line and it doesn't # appear on the list in the article, add it. if ($newsgroup) { chop; local ($quoted); foreach $group (split (/\s*,\s*/, $newsgroup)) { ($quoted = $group) =~ s/(\W)/\\$1/g; unless ("$_ " =~ /[,\s]$quoted[,\s]/) { $_ .= ",$group"; } } $_ .= "\n"; $newsgroup = ''; } } elsif ($header eq 'references') { $_ = &fix_up_msgids ($_); $saw_references = 1; } elsif ($header eq 'in-reply-to' && /(<[^>\@]+\@[^>\@]+>)/) { # One problem with this approach is that it picks up an email # address in In-Reply-To. Perhaps I should add a couple of # heuristics (eg, don't use it if it doesn't have any non-digits # in the local part). In practice almost nobody seems to put # mail addresses in In-Reply-To. $irt_msgid = &fix_up_msgids ($1); } elsif ($header eq 'path') { $saw_path = 1; } push (@header, $_); } &ex_unavailable("$Me: didn't get newsgroup from either headers" . " or the command line\n") unless $newsgroup || $saw_newsgroup; print INEWS @header; print INEWS "Path: not-for-mail\n" if !$is_inews && !$saw_path; print INEWS "Newsgroups: $newsgroup\n" if $newsgroup && !$saw_newsgroup; print INEWS "Subject: (no subject)\n" unless $saw_subject; print INEWS "Message-ID: ", &gen_message_id, "\n" unless $saw_message_id; print INEWS "References: $irt_msgid\n" if $irt_msgid && !$saw_references; print INEWS @add if @add; print INEWS "\n"; print INEWS $lookahead if defined $lookahead && $lookahead ne ''; print INEWS while $_ = &read_stdin; close INEWS; unlink $tee if $tee && $? == 0; &ex_unavailable("$Me: return $? from $inews_name\n") if $?; exit;