#Blah: Bibtex to LaTeX and HTML
#by Emanuele Viola building on bibtex parser by Gerhard Gossen
#v1.0 Friday, December 23, 2011  09:57

#v1.0 documentation
#Reads on (augmented) .bib file, outputs
#blah.html and blah.tex
#
#blah.tex is a sequence of \item, and within each \\
#blah.html is HTML separated using <br>
#
#Features:
#Handling authors:
#1) Does not write authors if only author is YOU
#2) Writes list of co-authors starting with "With" if
#YOU are among the author
#3) Writes full list of authors if YOU are not among them
#
#Handling preliminary versions:
#@journal entries supports extra entries to indicate
#preliminary version (e.g. conferences)
#
#Handling of special notes for html and/or LaTeX
#(e.g. best paper award)
#
#Handling of various HTML links associated to work
#bladdoc etc.
#
#Supports bibtex entries:
#@ARTICLE
#blahprelimbooktitle
#blahprelimyear
#blahprelimpages (currently not printed)
#blahnotel (note for LaTex)
#blahnoteh (note for HTML)
#blahdoc (paper/write-up/etc file)
#blahslides blahvideo blahcode
#journal, year, number, volume, pages
#Note on journal: if year not present, writes "to appear")
#@INPROCEEDINGS
#booktitle
#pages (currently not printed)
#year
#blahdoc blahslides blahvideo blahcode
#@UNPUBLISHED
#@BLAHCLOSEH{}
#@BLAHEND{}
#@BLAHPRINT{
#blahprintl={LaTeX code e.g. \begin{enumerate} \end{enumerate}},
#blahprinth={HTML code e.g. <h1></h1>},
#}
#
#Blah.pl is written in Strawberry Perl on WinXP
#Blah.pl uses Perl bibtex parser by Gerhard Gossen:
#https://metacpan.org/module/BibTeX::Parser
#(to install you type cpan BibTeX::Parser)
#
#to run: perl parseBib.pl 2>blaherr.txt
#
#the option "2>blaherr.txt" redirects the error output
#to err.txt.
#There are many warnings from Perl parser I do not understand.
#
#Note: Strings such as @string{jacm={Journal of the ACM}}
#are expanded by the parser.
#This happens even for fields in .bib entry I create.

#OUTL is LaTeX output file handle
#OUTH is HTML output file handle
open(OUTL,">blah.tex") || die "Can't open OUTL";
open(OUTH,">blah.html") || die "Can't open OUTH";

use BibTeX::Parser;
use IO::File;

my $fh = IO::File->new("C:\\home\\krv\\math\\OmniBib.bib");

# Create parser object ...
my $parser = BibTeX::Parser->new($fh);

#Expects an author field.
#Returns the string untouched if I am not present
# (useful for other works by research group)
#Returns empty if I am the only author.
#Otherwise returns string of co-authors,
# started with "With",
# not terminated
sub author_to_with{
 if (!($_[0] =~ "Emanuele Viola")) {
  return $_[0];
 }

 if ($_[0] eq "Emanuele Viola") {
  return "";
 }

 #Removes my name. Note I could be first or last
 my $with = $_[0];
 $with =~ s/ and Emanuele Viola//;
 $with =~ s/Emanuele Viola and //;
 return "With ".$with; 
}

#Next two functions are useful in adding new lines
#in LaTeX and HTML. It's done in two different ways.
#In LaTeX at beginning, in HTML at the end.

#Newline Latex
#"" -> ""
#Otherwise add "\\" at beginning
sub nll{
 if ($_[0] ne "") {
  $_[0] = "\\\\".$_[0];
 }
 return $_[0];
}

#Newline Html
#"" -> ""
#Otherwise add "<br>" at end
sub nlh{
 if ($_[0] ne "") {
  $_[0] = $_[0]."<br>\n";
 }
 return $_[0];
}

#Latex to Html.
#Performs various substitutions
sub l2h{
 my $h = $_[0];

 #Kills {,},$, except if they are preceded by \
 #For example \{0,1\} should stay as is
 $h =~ s/([^\\]){/\1/g;
 $h =~ s/([^\\])}/\1/g;
 $h =~ s/([^\\])\$/\1/g;
 
 #Various accents
 $h =~ s/\\'//g;
 $h =~ s/\\"//g;
 $h =~ s/\\a//g;
 $h =~ s/\\v //g;
 $h =~ s/\\c //g;

 $h =~ s/~/ /g;

 #Finally kills all \. This way \$ -> $, \{ -> { etc.
 $h =~ s/\\//g;

 return $h;
}

#Processes and prints title to OUTL and OUTH
sub print_title {
 #Note no nll for latex
 print OUTL "\\item ".$entry->field("title");
 print OUTH "<b>".nlh(l2h($entry->field("title")))."</b>";
}

#Processes and prints author to OUTL and OUTH
sub print_author {
 $author = $entry->field("author");
  
 print OUTL nll(author_to_with($author));
 print OUTH nlh(l2h(author_to_with($author)));
}

sub print_journal {
 #If year is missing then to appear, otherwise full entry
 if ($entry->field("year") eq "") {
  print OUTL nll("To appear in ".$entry->field("journal"));
  print OUTH nlh("To appear in ".l2h($entry->field("journal")));
 } else {
  #Constructs journal string
  #Some journals may miss volume, number, etc.
  #Name of journal and year is always needed here.
  my $journal = l2h($entry->field("journal")).", ";

  if ($entry->field("volume") ne "") {
   $journal = $journal."vol. ".$entry->field("volume").", ";
  }

  if ($entry->field("number") ne "") {
   $journal = $journal."num. ".$entry->field("number").", ";
  }

  if ($entry->field("pages") ne "") {
   $journal = $journal."pp. ".$entry->field("pages").", ";
  }

  print OUTL nll($journal.$entry->field("year"));
  print OUTH nlh($journal.$entry->field("year"));

 }
}

sub print_blahprelim {
 #Preliminary venue
 if ($entry->field("blahprelimbooktitle") ne "") {
  print OUTL nll("Preliminary version in ".$entry->field("blahprelimbooktitle").", ".$entry->field("blahprelimyear"));
  print OUTH nlh("Preliminary version in ".l2h($entry->field("blahprelimbooktitle")).", ".$entry->field("blahprelimyear"));
 }
}

sub print_proceedings {
 print OUTL nll("In ".$entry->field("booktitle").", ".$entry->field("year"));
 print OUTH nlh("In ".l2h($entry->field("booktitle")).", ".$entry->field("year"));
}

sub print_unpublished {
 print OUTL nll("Manuscript, ".$entry->field("year"));
 print OUTH nlh("Manuscript, ".$entry->field("year"));
}


sub print_blahnote {
 #Latex
 if (!($entry->field("blahnotel") eq "")) {
  print OUTL nll($entry->field("blahnotel"));
 }

 #html
 if (!($entry->field("blahnoteh") eq "")) {
  print OUTH nlh($entry->field("blahnoteh"));
 }
}

#Outputs "" if no link
#Outputs links terminated by <br> if some links
sub print_h_links {
 #Warning! Multiple occurrences of each field
 $blahdoc = $entry->field("blahdoc");
 $blahslides = $entry->field("blahslides");
 $blahvideo = $entry->field("blahvideo");
 $blahcode = $entry->field("blahcode");

 if (($blahdoc.$blahslides.$blahvideo.$blahcode) eq "") {
  return;
 }

 if ($blahdoc ne "") {
  print OUTH '<a href="'.$blahdoc.'">Document</a>&nbsp;&nbsp;';
 }

 if ($blahslides ne "") {
  print OUTH '<a href="'.$blahslides.'">Slides</a>&nbsp;&nbsp;';
 }

 if ($blahvideo ne "") {
  print OUTH '<a href="'.$blahvideo.'">Video</a>&nbsp;&nbsp;';
 }

 if ($blahcode ne "") {
  print OUTH '<a href="'.$blahcode.'">Code</a>&nbsp;&nbsp;';
 }

 print OUTH "<br>\n";
}


# Main iteration over entries
while ($entry = $parser->next ) {
 if (! $entry->parse_ok){
  warn "Error parsing file: ".$entry->error;
  next;
 }

 if ($entry->type eq "BLAHEND") {
  last;
 }

 if ($entry->type eq "BLAHCLOSEH") {
  close(OUTH) || die "Can't close OUTH";
  next;
 }

 if ($entry->type eq "BLAHPRINT") {
  print OUTL $entry->field("blahprintl");
  print OUTH $entry->field("blahprinth");
  next;
 }

 if ($entry->type eq "ARTICLE") {
  print_title;
  print_author;
  print_journal;
  print_blahprelim;
  print_blahnote;
  print_h_links;
 }

 if ($entry->type eq "INPROCEEDINGS") {
  print_title;
  print_author;
  print_proceedings;
  print_blahnote;
  print_h_links;
 }

 if ($entry->type eq "UNPUBLISHED") {
  print_title;
  print_author;
  print_unpublished;
  print_blahnote;
  print_h_links;
 }

 #Adds newline to OUTL for readability
 print OUTL "\n";

 #Newline in OUTH
 print OUTH "<br>\n";
}

print "Finished";
close(OUTL) || die "Can't close OUTL";
close(OUTH) || die "Can't close OUTH";

