#!/usr/bin/env perl #################################################################### # # File: bibweb # Author: John H. Palmieri # URL: http://www.math.washington.edu/~palmieri/Bibweb/ # Version: 0.62 of Mon Sep 24 11:14:31 PDT 2012 # Description: retrieve bibliographical information from MathSciNet # automatically # Copyright (c) 1997-2012 John H. Palmieri # License: distributed under GNU General Public License -- see below. # #################################################################### # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # (see file COPYING) along with this program; if not, write to the # Free Software Foundation, Inc., 59 Temple Place - Suite 330, # Boston, MA 02111-1307, USA. # #################################################################### # # Command line options: # FILE use FILE.aux as input, FILE.bib as output # -i FILE specify FILE as input (aux) file. # -o FILE specify FILE as output (bib) file. If FILE ends in # ".bib", write to FILE; otherwise, write to FILE.bib # -c REF looks up REF, rather than using an auxfile for input # -m NUM return at most NUM entries (where NUM is rounded up # to 5, 10, 20, 50, 100, 1000) # -e WEB_SITE use WEB_SITE for MathSciNet search # -std write output to STDOUT (the screen, ordinarily) # -sep CHAR use CHAR to delimit field in long citation format, # instead of semicolon (;) # -lax use % to comment lines in bibtex # -h print brief help message # # If you use only one of the -i and -o options, bibweb makes a guess # as to what the other file should be. #################################################################### use LWP::UserAgent; use HTTP::Request::Common; $bibtex = 'bibtex'; $thisprog = 'bibweb'; $version = '0.62'; # good choices for e_math: 'www.ams.org', 'ams.rice.edu', # 'ams.mathematik.uni-bielefeld.de', 'ams.mpim-bonn.mpg.de', # 'ams.u-strasbg.fr', 'ams.impa.br' $e_math = $ENV{MATHSCINET_SITE}; unless ($e_math) {$e_math = 'www.ams.org'} $use_stdout = 0; $debug = 0; $lax_comments = 0; $max_matches = 20; $usage = < \$auxfile, "output|o=s" => \$bibfile, "stdout|std|s" => \$use_stdout, "cite|c=s" => \$only_cite, "max|m=i" => \$max_matches, "emath|e=s" => \$e_math, "separator|sep=s" => \$new_separator, "lax" => \$lax_comments, "debug|D" => \$debug, "help|h" => \$help) or die "$usage\n"; if ($help) { die "$usage\n" }; if ($e_math eq 'ams') {$e_math = 'www.ams.org'} if ($e_math eq 'rice') {$e_math = 'ams.rice.edu'} if ($e_math eq 'bielefeld') {$e_math = 'ams.mathematik.uni-bielefeld.de'} if ($e_math eq 'bonn') {$e_math = 'ams.mpim-bonn.mpg.de'} if ($e_math eq 'strasbg') {$e_math = 'ams.u-strasbg.fr'} if ($e_math eq 'impa') {$e_math = 'ams.impa.br'} if ($e_math =~ / ^www.ams.org$| ^ams.rice.edu$| ^ams.mathematik.uni-bielefeld.de$| ^ams.mpim-bonn.mpg.de$| ^ams.u-strasbg.fr$| ^ams.impa.br$ /x) { print "Using \`$e_math\' for the MathSciNet search.\n" } else { print "Warning: Your choice of \`$e_math\' for the MathSciNet site is not one of the recommended choices. Proceeding anyway...\n\n"; } if ($e_math eq 'www.ams.org') {$redirect = 'Providence%2C+RI+USA'} if ($e_math eq 'ams.rice.edu') {$redirect = 'Houston%2C+TX+USA'} if ($e_math eq 'ams.mathematik.uni-bielefeld.de') { $redirect = 'Bielefeld%2C+Germany'} if ($e_math eq 'ams.mpim-bonn.mpg.de') {$redirect = 'Bonn%2C+Germany'} if ($e_math eq 'ams.u-strasbg.fr') {$redirect = 'Strasbourg%2C+France'} if ($e_math eq 'ams.impa.br') {$redirect = 'Rio+de+Janeiro%2C+Brazil'} unless ($auxfile and $bibfile) { $file = shift(@ARGV); $auxfile = ($auxfile or $file); $bibfile = ($bibfile or "$file.bib"); } if ($bibfile and $bibfile !~ m/\.bib$/) { $bibfile = "$bibfile.bib"; } if ($only_cite) { $only_cite =~ s(\'\")(); @bibtex_output = (1); $use_stdout = 1 if ($bibfile eq "" or $bibfile eq ".bib"); } else { $auxfile =~ s/\.aux$//; if ($auxfile and not $bibfile) { $bibfile = "$auxfile.bib" } unless (-e "$auxfile.aux") {die "Couldn't read $auxfile.aux\n"}; unless ($use_stdout) { open(BIBFILE, ">>$bibfile") or die "Couldn't open $bibfile\n"; print "Appending output to $bibfile . . . \n\n"; } @bibtex_output = `$bibtex $auxfile`; } if ($max_matches > 50) { $max_matches = 50; print "Rounding -m argument down to $max_matches.\n\n"; } $semicolon = ";"; if ($new_separator) { $semicolon = quotemeta ($new_separator) } if ($lax_comments) { $bibtex_short_comment = '%%'; $bsc = $bibtex_short_comment; $bibtex_long_comment = '%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%'; } else { $bibtex_short_comment = '@comment '; $bsc = $bibtex_short_comment; $bibtex_long_comment = $bsc; } foreach $warning (@bibtex_output) { # get citation if ($only_cite) { $citation_orig = $citation = $only_cite; $citation =~ tr/,/./; } elsif ($warning =~ (/^Warning--I didn\'t find a database entry for \"([^\"]*)\"$/)) { $citation_orig = $citation = $1; next if &check_bibfile($citation_orig); } else { next; } # split citation into author, etc. $citation =~ s/-and-/-/g; $citation =~ tr/./,/; $author = ''; $title = ''; $journal = ''; $year = ''; $dr = 'all'; $misc = ''; if ($citation =~ m/$semicolon/) { ($authors, $titles, $journals, $year) = split(/$semicolon/, $citation); $author = join('+and+', split(/-/, $authors)); $title = join('+and+', split(/-/, $titles)); $journal = join('+and+', split(/-/, $journals)); } else { ($author, $subcitation) = split(/-/, $citation, 2); if ($subcitation =~ /([<>=]?\d+)\Z/) { $subcitation = $`; $year = $1; } $misc = join('+and+', split(/-/, $subcitation)); } # parse year entry YEAR: { $dr = 'yearRangeFirst=&yearRangeSecond=&pg8=ET&s8=All'; last YEAR; $yr = ''; } # construct URL $url = "http://$e_math/mathscinet/search/publications.html?" . "pg4=AUCN&s4=" . "$author" . "&co4=AND&pg5=TI&s5=" . "$title" . "&co5=AND&pg6=JOUR&s6=" . "$journal" . "&co6=AND&pg7=ALLF&s7=" . "$misc" . "&Submit=Search&dr=all&yrop=eq&arg3=&" . "$yr" . "&fmt=bibtex" ; unless ($use_stdout) { &bib_print("", "$bibtex_long_comment \n"); } &bib_print("working on citation \'$citation_orig\' \n", "$bsc citation \'$citation_orig\' \n" ); $match_info_printed = ''; if ($debug) {print "debug information: URL is \n$url\n"}; # get response from MathSciNet my $ua = LWP::UserAgent->new; my $response = $ua->request(GET $url); if ($response->is_success) { $line = $response->as_string; #if ($debug) {print "debug information: line is \n$line\n"}; if ($line =~ /No Records Selected/) { &bib_print("No matches found; please revise your search criteria.\n\n", "$bsc No matches found. \n$bsc\n"); } else { @lines = ($line =~ m/\s*
\n([^<]*)\n\s*<\/pre>/g);
	    
	    #if ($debug) {print "debug information: now, lines are \n@lines \n"};

	    #if ($debug) {print "number of matches: " . scalar(@lines) . "\n\n"};
	    
	    $matches = scalar(@lines);

	    if ($matches == 1) {
		# get mr number, insert comment, and replace with citation
		$line = $1;
		
		# if ($debug) {print "NOW line is **$line**"};
		
		$line =~ /MRNUMBER = {([^}]*)}/;
		$mr = $1;
		&bib_print("", "$bsc Math Reviews number: $mr \n");
		$line =~ s/(MR[^,]*)/$citation_orig/;
		&bib_print ("", "$line\n");
	    }
	    elsif ($matches <= $max_matches) {
		foreach $line2 (@lines) {
		    if ($line2 =~ /[a-zA-Z]/) {
			# get mr number and insert comment
			$line2 =~ /MRNUMBER = {([^}]*)}/;
			$mr = $1;
			&bib_print("", "$bsc Math Reviews number: $mr \n");
			&bib_print ("", "$line2\n");
		    }
		}
	    }
	    elsif ($matches < 50) {
		# too many matches found
		&bib_print("More than $max_matches matches found " .
			   "($matches); " .
			   "please refine your search criteria,\n" .
			   "or use the -m option. \n\n",
			   "$bsc More than $max_matches matches found " .
			   "($matches). \n$bsc\n");
	    }
	    else {
		# way too many matches found
		&bib_print("At least $matches matches found; " .
			   "please refine your search criteria.\n\n",
			   "$bsc At least $matches matches found. \n$bsc\n");
	    }

	    # can max_matches be larger than 50??
	}
    }
}

close(BIBFILE);

# run bibtex again, to make use of new keys
exec "$bibtex $auxfile" unless ($only_cite);

########## simple subroutines

# scan BIBFILE for citation, to see if you've looked for it before
sub check_bibfile {
    local($answer) = 0;
    local($cite) = $_[0];
    if ($] >= 5) { $cite = "\Q$_[0]\E"; }
    if (-e $bibfile) {
	open(BIBFILE, "$bibfile");
	while () {
	    if (/$bsc\s*citation\s*\'$cite\'/) {
		$answer = 1;
		last;
	    }
	}
	close(BIBFILE);
	if ($answer) { print "You've searched for $_[0] before.\n\n" };
	$answer;
    }
    else {
	0;
    }
}

# print to screen and to BIBFILE, unless writing to STDOUT
sub bib_print {
    local($line1, $line2) = ($_[0], $_[1]);
    if ($use_stdout) {
	print $line2;
    }
    else {
	open(BIBFILE, ">>$bibfile") or die "Couldn't open $bibfile\n";
	print $line1;
	print BIBFILE "$line2";
    }
}