head	1.7;
access;
symbols;
locks; strict;
comment	@# @;


1.7
date	2005.03.25.15.22.19;	author roderick;	state Exp;
branches;
next	1.6;

1.6
date	2002.01.21.07.16.26;	author roderick;	state Exp;
branches;
next	1.5;

1.5
date	2001.11.21.14.58.15;	author roderick;	state Exp;
branches;
next	1.4;

1.4
date	2001.09.19.15.49.43;	author roderick;	state Exp;
branches;
next	1.3;

1.3
date	2001.09.19.15.28.23;	author roderick;	state Exp;
branches;
next	1.2;

1.2
date	2001.07.20.13.00.17;	author roderick;	state Exp;
branches;
next	1.1;

1.1
date	2001.02.08.20.35.47;	author roderick;	state Exp;
branches;
next	;


desc
@@


1.7
log
@Add --input-record-separator.
@
text
@#!/usr/bin/perl -w
use strict;

# $Id: randline,v 1.6 2002-01-21 02:16:26-05 roderick Exp roderick $
#
# Roderick Schertler <roderick@@argon.org>

# Copyright (C) 2001 Roderick Schertler
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or (at
# your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
#
# For a copy of the GNU General Public License write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

use RS::Handy qw($Me dstr getopt xdie xsrand);

my $Exit	= 0;
my $N		= 1;
my $Force_filename = undef;
my $Percent	= undef;
my $Seed	= undef;
my $Test	= 0;
my $Version	= q$Revision: 1.6 $ =~ /(\d\S+)/ ? $1 : '?';

my @@Option_spec = (
    'h|no-filename'	=> sub { $Force_filename = 0 },
    'H|with-filename'	=> sub { $Force_filename = 1 },
    'help!'		=> sub { usage() },
    'input-record-separator|irs=s' => \$/,
    'percent|p=f'	=> \$Percent,
    'n|pick=i'		=> \$N,
    'seed=i'		=> \$Seed,
    'test=i'		=> \$Test,
    'version'		=> sub { print "$Me version $Version\n"; exit },
);

my $Usage = <<EOF;
usage: $Me [switch]...
switches:
  -h, --no-filename	never output file name
  -H, --with-filename	output filename no matter what
  --irs, --input-record-separator s
    	    	    	specify input record separator (default newline)
      --help		show this and then die
  -p, --percent P	pick P percent of lines rather than -n value
  -n, --pick N		pick N random lines (default $N)
      --seed S		use S as random number seed
      --version		show the version ($Version) and exit
Use \`perldoc $Me\' to see the full documentation.
EOF

sub xwarn {
    RS::Handy::xwarn @@_;
    $Exit ||= 1;
}

sub usage {
    xwarn @@_ if @@_;
    die $Usage;
}

sub init {
    getopt -bundle, @@Option_spec or usage if @@ARGV;
    if (defined $Seed) {
	srand $Seed;
    }
    else {
	xsrand;
    }
}

sub run_one {
    my @@hit;

    while (<>) {
	$hit[0] = [$ARGV, $_] if rand($.) < 1;
    }

    return \@@hit;
}

# Thanks to mjd@@plover.com for the generalization to the $N > 1 case.

sub run_multi {
    my @@hit;

    while (<>) {
	push @@hit, [$ARGV, $_];
	last if @@hit == $N;
    }

    # be sure to notice EOF, else the following <> will read stdin
    return \@@hit if @@hit < $N;

    while (<>) {
    	if (rand() < $N / $.) {
	    splice @@hit, rand @@hit, 1;
	    push @@hit, [$ARGV, $_];
	}
    }

    return \@@hit;
}

sub test {
    my (%hit);

    if (defined $Percent) {
	xdie "--test not implemented for --percent\n";
    }

    my @@orig_argv = @@ARGV;
    for my $trial (1..$Test) {
    	@@ARGV = @@orig_argv;
	for (@@{ $N > 1 ? run_multi : run_one }) {
	    $hit{$_->[1]}++;
	}
    }

    for (sort { $hit{$a} <=> $hit{$b} or $a cmp $b } keys %hit) {
	printf "%6d %s", $hit{$_}, $_;
    }

    return 0;
}

sub main {
    init;

    my $print_filename = defined $Force_filename
			    ? $Force_filename
			    : @@ARGV > 1;

    if (defined $Percent) {
	if ($Percent < 0 || $Percent > 100) {
	    xdie "invalid --percent `$Percent'\n"
	}
	return 0 if !$Percent;
	$Percent /= 100;
	while (<>) {
	    if (rand 1 < $Percent) {
		print "$ARGV:" if $print_filename;
		print $_;
	    }
	}
	return 0;
    }

    if ($N < 0) {
    	xdie "invalid line count `$N'\n";
    }

    elsif ($N == 0) {
	return 0;
    }

    if ($Test) {
    	return test;
    }

    for my $hit (@@{ $N == 1 ? run_one : run_multi }) {
	my ($file, $line) = @@$hit;
	print "$file:" if $print_filename;
	print $line;
    }

    return 0;
}

$Exit = main || $Exit;
$Exit = 1 if $Exit && !($Exit % 256);
exit $Exit;

__END__

=head1 NAME

ranline - print lines selected at random

=head1 SYNOPSIS

B<randline>
[B<-h> | B<--no-filename>]
[B<-H> | B<--with-filename>]
[B<--help>]
[B<--irs> | B<--input-read-separator> I<s>]
[B<-p> | B<--percent> I<pct>]
[B<-n> | B<--pick> I<number>]
[B<--seed> I<number>]
[B<--version>]
[I<file>]...

=head1 DESCRIPTION

B<randline> chooses lines at random from its input and outputs them.  By
default it picks one line from all the input files (or stdin if no files
are specified).  In all cases it does this using a single pass over the
input, without saving the whole thing in memory.

=head1 OPTIONS

=over 4

=item B<-h>, B<--no-filename>

Never prepend the input file name to lines output.  Normally it's output
when there's more than one input file.

=item B<-H>, B<--with-filename>

Always output the input file name.

=item B<--help>

Show the usage message and die.

=item B<--irs>, B<--input-read-separator> I<s>

Specify a different input record separator (really, terminator) (default
newline).

=item B<-p>, B<--percent> I<pct>

Choose I<pct> percent of the input lines, rather than a fixed number.

=item B<-n>, B<--pick> I<number>

Choose I<number> lines from the input rather than 1.

=item B<--seed> I<number>

Use I<number> as the seed for the random number generator.  Normally
you wouldn't want to specify this, instead you'd let the program pick
the seed.  The B<--seed> switch is useful to achieve repeatability for
debugging.  If you use the same seed and other inputs B<randline> will
pick the same lines to output.

=item B<--version>

Show the version number and exit.

=back

=head1 AUTHOR

Roderick Schertler <roderick@@argon.org>

=cut
@


1.6
log
@Choke on --percent > 100.
@
text
@d4 1
a4 1
# $Id: randline,v 1.5 2001-11-21 09:58:15-05 roderick Exp roderick $
d23 1
a23 1
use RS::Handy qw($Me getopt xdie xsrand);
d31 1
a31 1
my $Version	= q$Revision: 1.5 $ =~ /(\d\S+)/ ? $1 : '?';
d37 1
d50 2
d194 1
d224 5
@


1.5
log
@Add --seed.
@
text
@d4 1
a4 1
# $Id: randline,v 1.4 2001-09-19 11:49:43-04 roderick Exp roderick $
d31 1
a31 1
my $Version	= q$Revision: 1.4 $ =~ /(\d\S+)/ ? $1 : '?';
d140 3
a142 1
	xdie "invalid --percent `$Percent'\n" if $Percent < 0;
@


1.4
log
@Comment.
@
text
@d4 1
a4 1
# $Id: randline,v 1.3 2001-09-19 11:28:23-04 roderick Exp roderick $
d29 1
d31 1
a31 1
my $Version	= q$Revision: 1.3 $ =~ /(\d\S+)/ ? $1 : '?';
d37 1
d39 1
a39 1
    'percent|p=f'	=> \$Percent,
d50 1
d52 1
a52 1
  -p, --percent P	pick P percent of lines rather than -n value
d69 6
a74 1
    xsrand;
d189 1
d191 1
a191 1
[B<-p> | B<--percent> I<pct>]
d219 4
d227 1
a227 1
=item B<-p>, B<--percent> I<pct>
d229 5
a233 1
Choose I<pct> percent of the input lines, rather than a fixed number.
@


1.3
log
@Add POD, copyright.

Add --test.

Add one-pass multi-file handling.
@
text
@d4 1
a4 1
# $Id: randline,v 1.2 2001-07-20 09:00:17-04 roderick Exp roderick $
d30 1
a30 1
my $Version	= q$Revision: 1.2 $ =~ /(\d\S+)/ ? $1 : '?';
d190 2
a191 1
are specified).
@


1.2
log
@Add --percent.
@
text
@d1 1
a1 1
#!/usr/bin/perl5 -w
d4 18
a21 1
# $Id: randline,v 1.1 2001-02-08 15:35:47-05 roderick Exp roderick $
d23 1
a23 1
use RJS::Handy qw($Me getopt xdie xsrand);
a24 1
my $Debug	= 0;
d27 1
a27 1
my $No_filename	= 0;
d29 2
a30 1
my $Version	= q$Revision: 1.1 $ =~ /(\d\S+)/ ? $1 : '?';
d33 2
a34 1
    'debug!'		=> \$Debug,
d36 1
a36 3
    'h|no-filename'	=> \$No_filename,
    'H|with-filename'	=> sub { $No_filename = 0 },
    'n=i'		=> \$N,
d38 1
a44 1
      --debug		turn debugging on
d48 1
a48 1
  -n N			pick N random lines (default $N)
d55 1
a55 1
    RJS::Handy::xwarn @@_;
a58 4
sub debug {
    print STDERR "debug: ", @@_, "\n" if $Debug;
}

d69 55
a124 2
    my (@@hit);

d127 3
a129 1
    my $print_filename = @@ARGV > 1 && !$No_filename;
d152 2
a153 4
    elsif ($N == 1) {
    	while (<>) {
	    $hit[0] = [$., $ARGV, $_] if rand($.) < 1;
	}
d156 2
a157 16
    else {
	# XXX You should be able to adapt the `rand($.) < 1' algorithm
	# to this.

	my @@in;
	while (<>) {
	    push @@in, [$., $ARGV, $_];
	}

	while (@@in && @@hit < $N) {
	    push @@hit, splice @@in, int(rand @@in), 1;
	}
    }

    for my $hit (sort { $a->[0] <=> $b->[0] } @@hit) {
	my ($line_num, $file, $line) = @@$hit;
d177 8
a184 1
B<prog> [I<switch>]...
d188 3
a190 1
B<prog> does foo
d196 1
a196 1
=item B<--debug>
d198 6
a203 1
Turn debugging on.
d209 8
d222 4
@


1.1
log
@Initial revision
@
text
@d4 1
a4 1
# $Id$
d12 2
a13 1
my $Version	= q$Revision$ =~ /(\d\S+)/ ? $1 : '?';
d21 1
d33 1
d63 13
@
