#!/usr/bin/perl -w
use strict;

# $Id: books-gen,v 1.17 2018-08-19 11:02:57-04 roderick Exp $
#
# Roderick Schertler <roderick@argon.org>

# Copyright (C) 1999 Roderick Schertler
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or (at
# your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
#
# For a copy of the GNU General Public License write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

# This program converts my text-file database of books read to HTML
# pages.  Input data (read with <>) is in the form
#
#    start-date end-date <rating + 5> <type> <copyright> \
#    	 <title><tab><author>[<tab><author>]...
#
# eg
#
#    -        19891228 - n ---- The Golden Apple		Shea R.		Wilson R. A.
#    -        19920516 2 n 1972 A Transatlantic Tunnel Hurrah!		Harrison Harry
#    19950717 19950726 7 n 1974 The Mote in God's Eye	Niven Larry	Pournelle Jerry
#    19990915 -        8 n 1965 Dune			Herbert Frank
#
# The fields up until the title are whitespace-separated, the rest have
# to be separated with tabs (since they can contain spaces).  Blank
# lines and comments are skipped.

my %Type = (
    b => 'bio',
    h => 'humor',
    x => 'nonfic',

    g => 'comic',
    n => 'novel',
    s => 'shortfic',

    o => 'other',
);

my %Rating = (
    ''  => -5,	# amount applied to rescale regular ratings
    'a' => -6,
    '-'	=> -7,
);

use RS::Handy qw(data_dump inverse_hash xdie);

my $Debug	= 0;
my $Exit	= 0;
my %Rating_rev	= inverse_hash %Rating;

my @Head	= ('-s', '', '-r', '-y', '-d', '-t', '-a');
my %Link	= (
    '-s' => 'Started',
      '' => 'Finished',
    '-r' => 'Rating',
    '-y' => 'Type',
    '-d' => 'Copyright',
    '-t' => 'Title',
    '-a' => 'Author',
);

sub F_START	() { 0 }
sub F_END	() { 1 }
sub F_RATING	() { 2 }
sub F_TYPE	() { 3 }
sub F_COPYRIGHT	() { 4 }
sub F_TITLE	() { 5 }
sub F_AUTHOR	() { 6 }

sub xwarn {
    RS::Handy::xwarn @_;
    $Exit ||= 1;
}

sub preamble {
    print <<EOF
<html><head><title>Books I've Read</title></head>
<body>

<p><a href="./">Back to index</a>

<p>Here's a list of most of the non-technical books I've read since
1990, with significant gaps between 2013 and 2018.

<dl>

    <dt> Rating

    <dd> 0 is an average enjoyable read, positive is better than average
    and negative is worse.  "a" (for abandoned) is a book I chose not
    to finish.  I didn't start out with strict guidelines for the scale,
    but I've never gone over +3 or under -3.  I have a lot more positive
    ratings than negative, I've presumed this is because I'm unlikely to
    pick up a book which I don't think I'll like.

    <p> If you largely agree with my rankings and you know a +2 or better
    which isn't on my list, <a href="mailto:roderick\@argon.org">tell me
    about it</a>!

    <dt> Copyright

    <dd> I'd prefer to track when the book was written, but I don't have
    a good way to do that.  I settled on tracking copyright dates in
    most cases.

</dl>

EOF
}

sub postamble {
    print <<EOF
<p> These HTML pages are generated from a text file using my <a
href="books-gen">books-gen script</a>.

<p> <a href="mailto:roderick\@argon.org">Roderick Schertler
&lt;roderick\@argon.org&gt;</a>
<p><a href="./">Back to index</a>
</body></html>
EOF
}

sub parse {
    my @book;

    while (<>) {
    	s/^\s*#.*//;
    	next if /^\s*$/;
	chomp;
	my ($start, $end, $rating, $type, $copyright, $rest) = split ' ', $_, 6;
	my ($title, @author) = split /\t+/, $rest;
	defined $author[0] or xdie "no author";

    	if ($start eq '-') {
	    $start = '';
	}
	else {
	    $start =~ /^(\d\d\d\d)(\d\d)(\d\d)$/
		or xdie "invalid start `$start'";
	    $start = "$1-$2-$3";
	}

    	if ($end eq '-') {
	    $end = '';
	}
	else {
	    $end =~ /^(\d\d\d\d)(\d\d)(\d\d)$/
		or xdie "invalid end `$end'";
	    $end = "$1-$2-$3";
	}

	if (defined(my $new = $Rating{$rating})) {
	    $rating = $new;
	}
	elsif ($rating =~ /^\d$/) {
	    $rating += $Rating{''};
	}
	else {
	    xdie "invalid rating `$rating'";
	}

	if (!defined $Type{$type}) {
	    xdie "invalid type `$type'";
	}

	if ($copyright =~ /^-+$/) {
	    $copyright = '-1';
	}
	elsif ($copyright !~ /^\d{4}$/) {
	    xdie "invalid copyright `$copyright'";
	}

	push @book, [$start, $end, $rating, $type, $copyright, $title, \@author];
    }

    return @book;
}

sub page_file {
    my ($type) = @_;

    return "books$type.html";
}

sub header {
    my ($type) = @_;
    my $saw_type;

    print "<tr>";
    for (@Head) {
	if ($type eq $_) {
	    print " <th>$Link{$_}";
	    $saw_type = 1;
	}
	else {
	    print ' <th><a href="', page_file($_), "\">$Link{$_}</a>";
	}
    }
    $saw_type or xdie "invalid type `$type'\n";
}

sub table {
    my ($type, @book) = @_;

    print "<p><table border=1>\n";
    header $type;

    for (@book) {
	my ($start, $end, $rating, $type, $copyright, $title, $rauthor) = @$_;
	$start = '&nbsp;' if $start eq '';
	$end   = '&nbsp;' if $end   eq '';
	if (defined(my $new = $Rating_rev{$rating})) {
	    $rating = $new;
	}
	$type = $Type{$type};
	$copyright = '&nbsp;' if $copyright == -1;
	my $author = join '<br>', @$rauthor;
	print "<tr>",
    	    	" <td>$start",
    	    	" <td>$end",
    	    	" <td align=right>$rating",
    	    	" <td>$type",
	    	" <td align=center>$copyright",
		" <td>$title <td>$author\n";
    }

    header $type;
    print "</table>\n";
}

sub page {
    my ($type, @book) = @_;
    local *FILE;

    my $file = page_file $type;
    print "generating $file\n";
    open FILE, ">$file" or xdie "can't create $file:";
    my $old_fh = select FILE;

    preamble;
    table $type, @book;
    postamble;

    close FILE or xdie "error closing $file:";
    select $old_fh;
}

{ my %cache;
sub dict ($) {
    my $s =shift;
    $cache{$s} ||= do {
	$s = lc $s;
	$s =~ tr/'//d;
	$s =~ tr/a-z/ /cs;
	$s =~ s/^ //;
	$s =~ s/ $//;
	$s =~ s/^(a|an|the) //;
	#print "$s\n";
	$s
    }
} }

sub main {
    my @book = parse;

    page '', reverse @book;
    page '-s', @book[sort { $book[$b][F_START] cmp $book[$a][F_START]
			    or $b <=> $a } 0..$#book];
    page '-r', @book[sort { $book[$b][F_RATING] <=> $book[$a][F_RATING]
			    or $b <=> $a } 0..$#book];
    page '-y', @book[sort { $book[$a][F_TYPE] cmp $book[$b][F_TYPE]
    	    	    	    or $b <=> $a } 0..$#book];
    page '-d', @book[sort { $book[$b][F_COPYRIGHT] <=> $book[$a][F_COPYRIGHT]
			    or $b <=> $a } 0..$#book];
    page '-t', @book[sort { dict $book[$a][F_TITLE] cmp dict $book[$b][F_TITLE]
			    or $b <=> $a } 0..$#book];

    # For the author sort, create a separate entry for each author of a
    # book.
    my @author;
    for my $b (@book) {
	my @a = @{ $b->[F_AUTHOR] };
	for (my $i = 0; $i <= $#a; $i++) {
	    push @author, [@$b];
	    # Rearrange the author list so this author comes first.
	    $author[$#author][F_AUTHOR] = [@a];
	    unshift @{ $author[$#author][F_AUTHOR] },
		splice @{ $author[$#author][F_AUTHOR] }, $i, 1;
	}
    }
    page '-a', @author[sort { $author[$a][F_AUTHOR][0]
				    cmp $author[$b][F_AUTHOR][0]
				or $b <=> $a } 0..$#author];

    return 0;
}

$Exit = main || $Exit;
$Exit = 1 if $Exit && !($Exit % 256);
exit $Exit;

# XXX add histograms for ratings, count by year, others?
