#!/home/eric/bin/perl # doaj2mylibrary.pl - harvest DOAJ metadata and import it into MyLibrary # Eric Lease Morgan # 2007-06-28 - added search for location type URL # 2006-01-12 - added pod # 2004-10-12 - first cut =head1 NAME doaj2mylibrary.pl - harvest DOAJ metadata and import it into MyLibrary =head1 DESCRIPTION The purpose of this program is to populate your MyLibrary database with records harvested from the OAI-PMH data repository called the Directory of Open Access Journals (DOAJ). If it doesn't already exist, the program will automatically create a location type called URL. If it doesn't already exist, the program will automatically create a facet called Subjects. The script will then get a list of OAI sets supported by the repository and create facet/term combinations in the form of Subjects/foobar where foobar is the name of each set. If it doesn't already exist, the program will automatically create a facet/term combination called Formats/Journals. Each set will then be harvested and each record in each set will be used to create a MyLibrary resource. Only very basic meta-data is recorded like title, creator, description, and identifier. =head1 AUTHOR Eric Lease Morgan =cut # include the necessary modules/subroutines use MyLibrary::Core; use Net::OAI::Harvester; use strict; require 'subroutines.pl'; # define where the OAI interface to DOAJ is use constant DOAJ => 'http://www.doaj.org/oai'; # display an introduction &clearScreen; print "\nIf it doesn't already exist, this program will first create a\n"; print "location type called URL. Then, if they don't exist, this program will\n"; print "first create a 'Subjects' facet. It will then create a set of subject\n"; print "terms based on the OAI sets defined by the Directory of Open Access\n"; print "Journals. It will then create a Formats facet, if it doesn't exist and\n"; print "create a related term called Journals. Finally, it will harvest the\n"; print "Directory's metadata and create MyLibrary records accordingly.\n\n"; print "Press enter (or return) to begin. "; ; # initialize my $location_type; # see if it exists foreach ( MyLibrary::Resource::Location::Type->all_types ) { my $type = MyLibrary::Resource::Location::Type->new( id => $_ ); if ( $type->name eq 'URL' ) { $location_type = $type->location_type_id; print "Location type URL exists\n"; last; } } # if not, then create it if ( ! $location_type ) { my $type = MyLibrary::Resource::Location::Type->new; $type->name('URL'); $type->description('Internet pointers'); $type->commit; $location_type = $type->location_type_id; print "Location type URL was created\n"; } # check for a facet called Subjects my $facet = MyLibrary::Facet->new; if (! MyLibrary::Facet->get_facets(value => 'Subjects', field => 'name')) { # create it $facet->facet_name('Subjects'); $facet->facet_note('Here you will find lists of terms describing the aboutness of things'); $facet->commit; print "\nThe facet Subjects was created.\n"; } else { # already exists $facet = MyLibrary::Facet->new(name => 'Subjects'); print "\nThe facet Subjects already exists.\n"; } my $facetID = $facet->facet_id; # get and set the subject terms/sets from DOAJ my $harvester = Net::OAI::Harvester->new('baseURL' => DOAJ); my $sets = $harvester->listSets; foreach ($sets->setSpecs) { # check for this particular term if (! MyLibrary::Term->get_terms(value => $sets->setName($_), field => 'name')) { # create it my $term = MyLibrary::Term->new; $term->term_name($sets->setName($_)); $term->term_note('This term comes from the DOAJ.'); $term->facet_id($facetID); $term->commit; print 'The term ', $sets->setName($_), " was created.\n"; } # it already exits else { print 'The term ', $sets->setName($_), " already exists.\n" } } # check for a facet called Formats $facet = MyLibrary::Facet->new; if (! MyLibrary::Facet->get_facets(value => 'Formats', field => 'name')) { # create it $facet->facet_name('Formats'); $facet->facet_note('This is a list of physical formats for information resources.'); $facet->commit; print "The facet Formats was created.\n"; } else { # it already exits $facet = MyLibrary::Facet->new(name => 'Formats'); print "The facet Formats already exists.\n"; } my $formatID = $facet->facet_id; # check for term named Journals my $term = MyLibrary::Term->new; if (! MyLibrary::Term->get_terms(value => 'Journals', field => 'name')) { # create it $term->term_name('Journals'); $term->term_note('These are scholarly serial publications.'); $term->facet_id($formatID); $term->commit; print "The term Journals was created.\n"; } else { # it already exists $term = MyLibrary::Term->new(name => 'Journals'); print "The term Journals already exists.\n"; } my $journalTermID = $term->term_id; # loop through each OAI set from DOAJ foreach ($sets->setSpecs) { # get this set name print "\n$_\n"; my $term = MyLibrary::Term->new(name => $sets->setName($_)); my $termID = $term->term_id; # get the records in this set my $records = $harvester->listAllRecords(metadataPrefix => 'oai_dc', set => $_); # process each record while (my $record = $records->next) { # extract the metadata my $FKey = $record->header->identifier; my $metadata = $record->metadata; my $name = $metadata->title; my $publisher = $metadata->publisher; my $language = $metadata->language; my $location = $metadata->identifier; print "$name..."; # check to see if it already exits if (! MyLibrary::Resource->new(fkey => $FKey)) { # create it my $resource = MyLibrary::Resource->new; $resource->name($name); $resource->publisher($publisher); $resource->language($language); $resource->fkey($FKey); $resource->related_terms(new => [$journalTermID, $termID]); $resource->add_location(location => $location, location_type => $location_type); $resource->commit; print "added (", $resource->id, ").\n"; } else { # update it my $resource = MyLibrary::Resource->new(fkey => $FKey); $resource->related_terms(new => [$termID]); $resource->commit; print "already exists. Updated.\n"; } } } # done print "\nDone\n"; exit;