#!/usr/bin/perl
#
#  bids.to.bibtex is based on a perl script written by Jonathan Swinton,
#  js@ic.ac.uk, and subsequently modified by Ben Bolker and Anthony Stone,
#  ajs1@cam.ac.uk.

#  This version last modified on 29 January 1998.

#  Usage: bids.to.bibtex <file>
#
#  will take input from a BIDS email message in <file> in the source directory
#  $sourcedir and produce a BibTeX bibfile <file>.bib in the output directory
#  $bibdir.

#  If both files are to be in the current directory, replace the definitions
#  below by
#  $sourcedir = "";
#  $bibdir = "";
#  If the output file exists, the new references are appended to it. Otherwise
#  a new file is created.

#  You will need to change the first line of this file if perl lives somewhere
#  other than /usr/bin/perl on your system.

#  The BIDS message should have been generated using one of the downloading
#  formats. Other bibliography database output can be processed provided that
#  it is in the same (fairly standard) form, i.e. a sequence of fields each
#  beginning with a two-letter flag at the start of a line, followed by "- ",
#  with continuation lines starting with four spaces. The whole entry is
#  terminated by a blank line. Any lines in the file that do not conform to
#  this pattern are ignored. An example of such an entry is
#  TI- THE NATURE AND GEOMETRY OF INTERMOLECULAR INTERACTIONS BETWEEN 
#      HALOGENS AND OXYGEN OR NITROGEN
#  AU- LOMMERSE, JPM;STONE, AJ;TAYLOR, R;ALLEN, FH
#  NA- CAMBRIDGE CRYSTALLOG DATA CTR,12 UNION RD,CAMBRIDGE CB2 1EZ,ENGLAND
#      CAMBRIDGE CRYSTALLOG DATA CTR,CAMBRIDGE CB2 1EZ,ENGLAND
#      UNIV CAMBRIDGE,CHEM LAB,CAMBRIDGE CB2 1EW,ENGLAND
#  JN- JOURNAL OF THE AMERICAN CHEMICAL SOCIETY
#  PY- 1996
#  VO- 118
#  NO- 13
#  PG- 3108-3116
#  IS- 0002-7863
#
#  Minor variations in this format can be dealt with by changing the "elsif"s
#  near the end of this file.

#  Some work will be needed to deal with capitalization and accents in the
#  BibTeX file. The title is converted to lower-case, unless it already
#  contains any lower-case characters, and a few common acronyms are converted
#  back to upper case by subroutine special. Use the examples there as
#  models for your own if you like. An attempt is made to convert chemical
#  formulae, but it is usually necessary to do this by hand. Note that any
#  upper-case material in the title needs to be enclosed in braces, or BibTeX
#  will force it to lower-case again. If your citation format doesn't include
#  the title, then of course this isn't a problem.

#  You may wish also to modify the "journal" subroutine, which replaces the
#  full journal title by a conventional abbreviation. This almost certainly
#  doesn't deal with all the journals that you might wish to cite.

#  This version assumes perl 5, but it should work with perl 4.

#  Please address comments and bug reports to Anthony Stone (ajs1@cam.ac.uk),
#  who will be pleased to receive them but does not promise to do anything
#  about them.

$sourcedir = "/users/anthony/Mail/BIDS/";
$bibdir = "/users/anthony/bibliography/bibfiles/";
 
$, = ' ';  # set output field separator
$\ = "\n";  # set output record separator

format OUT =
^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
$line
              ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<~~
$line
.

%name = (
    'TI', 'title',
    'AU', 'author',
    'NA', 'address',
    'JN', 'journal',
    'PY', 'year',
    'VO', 'volume',
    'NO', 'issue',
    'PG', 'pages',
    'AB', 'abstract',
    'KP', 'keywords',
#    'NA', 'ignore',
#    'CR', 'ignore',
#    'RF', 'ignore',
#    'KP', 'ignore',
#    'PA', 'ignore'
     );

@entry{'header','author','title','journal','volume','year','issue','pages',
    'keywords','abstract','none'} = ('','','','','','','','','','','');
$type = 'none';
 
sub initcaps
#  Capitalise initial letter of every word, lower-case the rest
{
  local ($arg) = $_[0];
  $arg =~ s/([A-Za-z])([A-Za-z]+)/\U$1\E\L$2\E/g;
  $arg;
}
 
sub firstupper
#  Capitalise initial letter of every sentence, lower-case the rest
{
  local ($arg) = $_[0];
  $arg =~ s/(.)(.*)/\U$1\E\L$2\E/;
  $arg =~ s/([.?!]\s+)([a-z])/$1\u$2/g;
  $arg;
}

sub lowercase
{
  local ($arg) = $_[0];
  $arg =~ s/(.*)/\L$1\E/;
  $arg;
}

sub lowertrivial
{
  local ($arg) = $_[0];
  $arg =~ s/\ba\b/a/gi;
  $arg =~ s/\ban\b/an/gi;
  $arg =~ s/\band\b/and/gi;
  $arg =~ s/\bthe\b/the/gi;
  $arg =~ s/\bof\b/of/gi;
  $arg =~ s/\bto\b/to/gi;
  $arg =~ s/\bfrom\b/from/gi;
  $arg =~ s/\bin\b/in/gi;
  $arg =~ s/\bwith\b/with/gi;
  $arg;
}

sub special
#  Add your own acronyms. Note that the upper-case replacement is enclosed
#  in braces so that BibTeX doesn't put it back into lower-case.
{
  local ($arg) = $_[0];
  $arg =~ s/\bscf\b/{SCF}/g;
  $arg =~ s/\bmc(-*)scf\b/{MC$1SCF}/g;
  $arg =~ s/\bci\b/{CI}/g;
  $arg =~ s/\b([0-9]-[0-9]+)g\b/{$1G}/g;
  $arg =~ s/\bdna\b/{DNA}/g;
  $arg =~ s/vanderwaals/{Van} der {Waals}/g;
  $arg =~ s/moller-plesset/{M}{\\o}ller--{Plesset}/g;
  $arg;
}

sub formulae
#  Chemical formulae -- impossible to get them all right
{
  local ($arg) = $_[0];
  $arg =~ s/(\b[a-z]+[1-9]+[a-z0-9]+\b)/{\U$1\E}/gi;
  $arg;
}

sub strip
{
  local ($val) = $_[0];
 
  $val =~ s/....//; # chop off initial flag and surrounding spaces
  $val =~ s/ *$//; # some entries have trailing spaces
  $val;
}

sub header
{
  $line = "\@article{" . $flag . ",";
  write(OUT);
}
 
sub authors
{
  #  Transform author entry into BibTeX format and construct flag.
  #  The flag is made up from the first author's surname, followed by the
  #  first letter of the surname of each remaining author, followed by the
  #  last two digits of the year. This is usually unique, but not always.
  @arr = split(/;/,$entry{'author'});
  $flag = "";
  foreach $auth ( @arr)
  {
    ($sname,$fname) = split(/, /, $auth);
    $fname =~ s/\w/$&. /g; $fname =~ s/ *$//;
    $sname = &initcaps($sname); $sname =~ s/^(Mac|Mc|O')([a-z])/$1\u$2/;
    # print $fname . " " . $sname . "\n";
    if ($flag eq "") {$flag=$sname; $flag =~ s/ //g;} else {$flag .= substr($sname,0,1);}
    $auth = $sname . ', ' . $fname ;
  }
  $entry{'author'} = join (" and ", @arr);
  $flag .= substr($entry{'year'},-2);   # append last two digits of year to flag
  $flag =~ s/ //g;
}

sub author
{
  $line = '  Author    = {' . $entry{'author'} . '},';
  $line = &texsafety ($line);
  write(OUT);
}
 

sub address
{
  $entry{'address'} =~ s/,([^ ])/, $1/g;
  $entry{'address'} = &lowertrivial(&initcaps($entry{'address'}));
  $line = '  Address   = {' . $entry{'address'} . '},';
  $line = &texsafety ($line);
  write(OUT);
}

sub title
{
  # If the title contains any lowercase characters already, leave it
  # unchanged. Otherwise convert to lowercase except for first letter of
  # each sentence, and attempt to uppercase chemical formulae and common
  # acronyms.
  if ( $entry{'title'}=~/[a-z]/ ) {
    $line = $entry{'title'};}
  else {
    $line = &formulae(&special(&firstupper($entry{'title'})));}
    $line = &texsafety($line);
    $line = '  Title     = {' . $line  . '},';
    # print $line;
  write(OUT);
}

sub journal
{
  $journal = &initcaps($entry{'journal'});
  $journal =~ s/Journal/J./;
  $journal =~ s/Chemical|Chemistry/Chem./;
  $journal =~ s/Physics|Physical/Phys./;
  $journal =~ s/Society/Soc./;
  $journal =~ s/Communications/Comm./;
  $journal =~ s/Transactions/Trans./;
  $journal =~ s/Reviews/Rev./;
  $journal =~ s/-Chemical/ Chem./;
  $journal =~ s/-Faraday/ Faraday/;
  $journal =~ s/Discussions/Disc./;
  $journal =~ s/American/Amer./;

  $journal =~ s/(\s)a\s/$1/gi;
  $journal =~ s/(\s)an\s/$1/gi;
  $journal =~ s/(\s)and\s/$1/gi;
  $journal =~ s/(\s)the\s/$1/gi;
  $journal =~ s/(\s)of\s/$1/gi;
  $journal =~ s/(\s)to\s/$1/gi;
  $journal =~ s/(\s)from\s/$1/gi;
  $journal =~ s/(\s)in\s/$1/gi;
  $journal =~ s/(\s)with\s/$1/gi;
  $line = '  Journal   = {' . $journal . '},';
  write(OUT);
}

sub year
{
  $line = '  Year      = {' . $entry{'year'} . '},';
  write(OUT);
}

sub volume
{
  $line = '  Volume    = {' . $entry{'volume'} . '},';
  write(OUT);
}

sub pages
{
  $line = '  Pages     = {' . $entry{'pages'} . '}';
  if ( $entry{'abstract'} ne "" || $entry{'keywords'} ne "" ) {$line .= ',';}
  $line = &texsafety ($line);
  write(OUT);
}

sub keywords
{
  if ( $entry{'keywords'} ne "" ) {
    $line = '  Keywords  = {' . &lowercase($entry{'keywords'}) . '}';
    if ( $entry{'abstract'} ne "" ) {$line .= ',';}
    write(OUT);}
}

sub abstract
{
  if ( $entry{'abstract'} ne "" ) {
    $line = '  Abstract  = {' . $entry{'abstract'} . '}';
    $line = &texsafety ($line);
    write(OUT);}
}

sub issue
{
}

sub terminator
{
  $line = "}";
  write(OUT);
  print OUT "\n";
}

sub clear 
{
 $entry{'author'} = '';
 $entry{'title'} = '';
 $entry{'journal'} = '';
 $entry{'volume'} = '';
 $entry{'year'} = '';
 $entry{'page'} = '';
 $entry{'issue'} = '';
 $entry{'abstract'} = '';
}
 
 
sub texsafety
{
 # Replace % and & by \% and \&.
 $arg = $_[0];
 $arg =~  s/\&/\\&/g;
 $arg =~  s/\%/\\%/g;
 $arg;
}
 
while ($file = shift) {
  open(IN,$sourcedir.$file)
                 || die "Can't open ".$sourcedir.$file."\n";
  if ( -e $bibdir.$file.".bib" ) {
    warn $file.".bib exists -- appending\n"; 
    open(OUT,">>" . $bibdir.$file . ".bib");}
  else {
    open(OUT,">" . $bibdir.$file . ".bib");}
  while (<IN>) {
    # print;
    chop; # strip record separator
   
    if (/^ *$/ && $type ne 'none' && $type ne 'header' ) {
      # Entry is complete. Process and print it.
      & authors;
      & header;
      & author;
      & address;
      & title;
      & journal;
      & year;
      & volume;
      & issue;
      & pages;
      & keywords;
      & abstract;
      & terminator;
      $type = 'none';
      & clear; }

    else {
      if (/^record/i) {
        $type = 'header';
        $entry{$type} = $_;
      }
      elsif (/^\(([0-9]+)\)/) {
	  die "Input appears to be in the wrong format. You need to use 'downloading' format.\n";
      }
    
      elsif (/^    / && $type ne 'none' && $type ne 'ignore') {
        $entry{$type} .= " " . & strip($_);
      }
      elsif (/^([A-Z][A-Z])- /i) {$tag=$1;
        if ( defined($name{$tag}) ) {
          $type = $name{$tag};
          $entry{$type} = & strip ($_) ;
	}
        else {
          $type = "ignore";
          $entry{$type} = "";
        }
      }
        
      # print "$type : $entry{$type}\n";
    }
  }
close IN;
close OUT;
}