# Perl script to convert citations from Xplore search results to bibtex # # Usage: xplore.pl source.txt # # source.txt is a text file that has been copied and pasted from the IEEE Xplore # search results and contains one or more citations. If this argument is omitted # then a file called xplore.txt in the local directory will be used. # # Outputs: source.bib is a bibtex file created in the same directory as source.txt. The output # file has the same name as the input file but with an extension of .bib # # # To enable right-clicking, you can create an association with a .TXT file called "IEEE bib" having # an action "...\perl\bin\perl.exe" "...\xplore.pl" "%1" [where the ... are system dependent] #% Copyright (C) Mike Brookes 2006 #% Version: $Id: xplore.pl,v 1.16 2006/01/25 12:36:39 dmb Exp $ #% #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% #% This program is free software; you can redistribute it and/or modify #% it under the terms of the GNU General Public License as published by #% the Free Software Foundation; either version 2 of the License, or #% (at your option) any later version. #% #% This program is distributed in the hope that it will be useful, #% but WITHOUT ANY WARRANTY; without even the implied warranty of #% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #% GNU General Public License for more details. #% #% You can obtain a copy of the GNU General Public License from #% ftp://prep.ai.mit.edu/pub/gnu/COPYING-2.0 or by writing to #% Free Software Foundation, Inc.,675 Mass Ave, Cambridge, MA 02139, USA. #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% $logging = 0; # set to 1 to enable logging if (@ARGV) { $infile = $ARGV[0]; } # only use the first argument for now else { $infile = "xplore.txt"; } if ( $infile =~ /(.*)\.(.*)/ ) { $2 ne "bib" || die "Cannot convert a .bib file"; $inbase = $1; $inext = $2; } else { $inbase = $infile; $inext = ""; } $outfile = $inbase . ".bib"; open( INFILE, $infile ) || die "Can't open $infile\a\n"; open( OUTFILE, ">$outfile" ) || die "Can't open $outfile\a\n"; $logging and $logfile = $inbase . ".log"; $logging and open( LOGFILE, ">$logfile" ) || die "Can't open $logfile\a\n"; @gmt = localtime(time); $timestamp = sprintf( '%d.%02d.%02d', 1900 + $gmt[5], $gmt[4] + 1, $gmt[3] ); $line = 1; # line of entry while () { s/\s+/ /; # compress any spaces s/^\s+//; # trim leading whitespace s/\s+$//; # and training whitespace tr/‘’/`'/; # fix bad quote marks s/“/``/g; s/"/``/; # fix leading double quote mark s/["”]/''/g; # and trailing ones $logging and print LOGFILE "$line: $_\n"; if ( !length || !$line || /abstractplus|full text:/i ) { # detect inter-reference gap $logging and print LOGFILE "check print: $title\n"; if ( defined $doi || defined $pstart || defined $year ) { &printit; } undef $author; undef $journal; undef $volume; undef $issue; undef $month; undef $year; undef $pstart; undef $pend; undef $doi; $line = 1; } elsif ( $line == 1 ) { # title line s/^[0-9]+\.\s+//; # trim leading number $title = $_; # save title $line = 2; } elsif ( $line == 2 ) { # author line s/([a-z])\s+([A-Z]\.)/\1, \2/g ; # insert comma between Last name and an initial s/([^;]*,)([^;]*),([^;]*);/$1$3,$2;/g ; # "Jr" part of name should come in the middle not at the end s/;$//; # remove final ; s/ *; */ and /g; # change remaining ; to and $author = $_; s/ and .*//; # eliminate all but first author s/\s*,.*//; # eliminate everything after a comma /.*(^|\s)([A-Za-z][A-Za-z]+)/ ; # find last word with >=2 alphabetic chars $key = $2; $line = 3; } elsif ( $line == 3 ) { # journal line s/\[(.*workshop.*)\]/\1/i ; # sometimes "Workshop on" is enclosed in brackets s/\[.*\]//g; # delete alternative journal name s/(.*),\s*(.*)/\2 \1/; # uninvert journal names $conf = !/(Transactions|Journal|Magazine|Letters)/i ; # decide if it is a conference s/[., ]+/ /g; # remove punctuation s/\s+[1-2][0-9][0-9][0-9]\s+/ /g; # remove 4-digit dates s/\s+'[0-9][0-9]\s+/ /g; # remove 2-digit dates s/\s+[VIX]+\s+/ /g; # remove roman numbers # abbreviate some common words s/Transactions/Trans/g; s/Magazine/Mag/g; s/Letters/Lett/g; s/Conference/Conf/g; s/International/Intl/g; s/Symposium/Symp/g; s/Workshop/Wkshp/g; s/Proceedings/Proc/g; s/Trans on/Trans/g; s/Proc of the/Proc/g; # remove ordinal numbers s/\s+[0-9]+(st|nd|rd|th)\s+/ /g; # correct some favourite conferences that the script finds tricky s/.*ICASSP.*/Proc IEEE Intl Conf on Acoustics Speech and Signal Processing/ || s/.*ISCAS.*/Proc IEEE Intl Symp on Circuits and Systems/ || s/.*Asilomar Conf on Signals.*/Proc Asilomar Conf on Signals, Systems and Computers/ || s/.*Intl Conf on Image Processing.*/Proc Intl Conf on Image Processing/ || s/.*Design Automation Conf.*/Proc Design Automation Conf/ || s/.*IEEE Intl Solid-State Circuits Conf.*/Proc IEEE Intl Solid-State Circuits Conf/; s/^\s+//; #trim leading whitespace s/\s+$//; #trim training whitespace $journal = $_; $logging and print LOGFILE "Conf check: $conf\n"; $line = 4; } elsif ( $line == 4 ) { # Volume information if (/Volume\s+([0-9]*)/) { $volume = $1; $logging and print LOGFILE "Vol: $1\n"; } if (/Issue\s+([0-9]+)/) { $issue = $1; $logging and print LOGFILE "Issue: $1\n"; } if ( /(Jan|Feb|March|April|May|June|July|Aug|Sept|Oct|Nov|Dec)\.* ([12][0-9][0-9][0-9])/ ) { $month = lc( substr( $1, 0, 3 ) ) ; # Just use the first 3 letters of month since bib style will convert $year = $2; $logging and print LOGFILE "Month: $month Year: $year\n"; } elsif (/([0-9]+)\s*Page/) { if ( ( $1 >= 1900 ) && ( $1 < 2100 ) ) { $year = $1; } } if (/Page[^:]*:([0-9]+)\s*-[\s-]*([0-9]+)/) { $pstart = $1; $pend = $2; $logging and print LOGFILE "Pages: $1 to $2\n"; } $line = 5; } elsif ( $line == 5 ) { # DOI /Digital Object Identifier (.*)/; if ( length $1 ) { $doi = $1; $logging and print LOGFILE "DOI: $doi\n"; } $line = 0; } else { # ignore should not happen $line = 1; } } # end while (< >) # finally print out the last entry if ( defined $doi || defined $pstart || defined $year ) { &printit; } close INFILE; close OUTFILE; $logging and close LOGFILE; ################################################################# sub printit { $basekey = "$key$year"; if ( exists( $basecnt{$basekey} ) ) { # check if we have had a previous reference with this base key $fullkey = "$basekey$basecnt{$basekey}"; # add on a sufffix: "a", "b", ... $basecnt{$basekey}++; } else { $basecnt{$basekey} = "a"; $fullkey = "$basekey"; # omit the sufffix if it is "a" } $logging and print LOGFILE "print: $fullkey: $title\n"; if ($conf) { print OUTFILE "\@INPROCEEDINGS{$fullkey,\n"; print OUTFILE "author = {$author},\n"; print OUTFILE "title = {{$title}},\n"; print OUTFILE "booktitle = {{$journal}},\n"; } else { print OUTFILE "\@ARTICLE{$fullkey,\n"; print OUTFILE "author = {$author},\n"; print OUTFILE "title = {{$title}},\n"; print OUTFILE "journal = {$journal},\n"; } print OUTFILE "year = {$year},\n"; $month && print OUTFILE "month = $month,\n"; $volume && print OUTFILE "volume = {$volume},\n"; $pstart && print OUTFILE "pages = {$pstart-$pend},\n"; $issue && print OUTFILE "number = {$issue},\n"; $doi && print OUTFILE "doi = {$doi},\n"; print OUTFILE "timestamp = {$timestamp},\n"; print OUTFILE "}\n\n"; }