#!/home/eric/bin/perl # marc2oai - Convert MARC in communications format to simple OAI-XML files # Eric Lease Morgan # 2007-07-10 - added prefix # 2007-06-17 - first cut # define use constant STYLE => './marc2oai.xsl'; use constant DATA => '/home/eric/apache/htdocs/oai/data/books/'; # require use MARC::Batch; use MARC::File::XML ( BinaryEncoding => 'utf8', RecordFormat => 'USMARC' ); use strict; use XML::LibXML; use XML::LibXSLT; # get the input my $filename = shift @ARGV; my $prefix = shift @ARGV; if ( ! $filename or ! $prefix ) { print "Usage: $0 filename prefix\n"; exit; } # make i/o binary binmode( STDOUT, ':utf8' ); binmode( $filename, ':bytes' ); # initialize XSLT my $parser = XML::LibXML->new; my $xslt = XML::LibXSLT->new; my $style = $parser->parse_file( STYLE ) or croak $!; my $stylesheet = $xslt->parse_stylesheet( $style ) or croak $!; # loop through the marc records my $index = 0; my $batch = MARC::Batch->new( 'USMARC', $filename ); while ( my $record = $batch->next ) { # sanity check eval { my $source = $parser->parse_string( MARC::File::XML::record( $record )) }; if ( $@ ) { next } # parse and transform my $source = $parser->parse_string( MARC::File::XML::record( $record )) or croak $!; my $results = $stylesheet->transform( $source ) or croak $!; # increment the index and save $index++; open OUTPUT, " > " . DATA . "$prefix-$index.xml" or die "Can't open output ($!)\n"; print $stylesheet->output_string( $results ) . "\n"; print OUTPUT $stylesheet->output_string( $results ); close OUTPUT; } # done exit;