Fun with KinoSearch

#!/usr/bin/perl # sru-server.cgi - access a kinosearch index via SRU; just take advantage of the toLucene method # Eric Lease Morgan # 2008-02-05 - moved back to beta space (GG died this morning) # 2007-09-21 - added pager # 2006-05-31 - output additional dc fields depending on the dc type # 2006-05-30 - first cut # require the necessary modules use CGI qw(-oldstyle_urls); use CGI::Carp qw(fatalsToBrowser); use CQL::Parser; use SRU::Request; use SRU::Response; use strict; use KinoSearch::Searcher; use KinoSearch::Analysis::PolyAnalyzer; use warnings; # where is my index use constant INDEX => '../etc/index'; # initlize the necessary objects my $cgi = CGI->new(); my $request = SRU::Request->newFromCGI($cgi) || &error; my $response = SRU::Response->newFromRequest($request) || &error; # check for type of response; explain if ($response->type() eq 'explain') { # fill up the response's record $response->record(SRU::Response::Record->new(recordSchema => 'http://explain.z3950.org/dtd/2.0/', recordData => &record)); } # scan elsif ($response->type() eq 'scan') { $response->addDiagnostic(SRU::Response::Diagnostic->newFromCode(4, 'Scan operation is not supported at this stage.')); $response->asXML(); } # search else { # parse the query and check it my $parser = CQL::Parser->new; eval { $parser->parse($request->query) }; if ($@) { $response->addDiagnostic(SRU::Response::Diagnostic->newFromCode(10,$@)); print $cgi->header('text/xml'); print $response->asXML(); exit; } my $node = $parser->parse($request->query); # search my ( $total_hits, @results ) = &search( $node->toLucene ); # process each result for (my $i = 0; $i <= $#results; $i++) { # check for maximum records if (defined($request->maximumRecords)) { last if ($i >= $request->maximumRecords) } # create a records object and add it to the responee my $record = SRU::Response::Record->new(recordSchema => 'info:srw/schema/1/dc-v1.1', recordData => $results[$i]); $response->addRecord($record); } # include the total number of hits $response->numberOfRecords( $total_hits ); } # done; output the result print $cgi->header(-type => 'text/xml', -charset => 'utf-8'); print $response->asXML(); exit; ###################################################### # local subroutines; should I make these into methods? sub record { my $email = 'emorgan@nd.edu'; return < dewey.library.nd.edu 80 morgan/kinosearch/sru/server.cgi Fun with KinoSearch This is just a test to see if I can provide SRU access to a KinoSearch index. Eric Lease Morgan, University Libraries of Notre Dame Eric Lease Morgan ($email) The index contains content from DOAJ, Infomotions' Image Gallery, and a number of MARC records describing 18th century stuff. All the records in this database are in English. This SRU server is written in Perl through the use of three API's. One for SRU (http://http://search.cpan.org/dist/SRU/). One for CQL (http://search.cpan.org/dist/CQL-Parser/), and one for the underlying indexer/search engine, KinoSearch (http://http://www.kinosearch.com/kinosearch/). http://dewey.library.nd.edu/morgan/kinosearch/sru/client.html http://dewey.library.nd.edu/morgan/kinosearch/sru/server.cgi 2006-05-30 18:12:04 subject subject related related creator creator publisher publisher title title Dublin Core 250 EOF } sub search { # initialize my $query = $_[0]; my @results; # set up KinoSearch my $analyzer = KinoSearch::Analysis::PolyAnalyzer->new( language => 'en' ); my $searcher = KinoSearch::Searcher->new( invindex => INDEX, analyzer => $analyzer ); # search my $hits = $searcher->search( $query ); # display the number of hits my $total_hits = $hits->total_hits; # define start and end my $start_record = 0; $start_record = $request->startRecord unless ( ! $request->startRecord ); my $maximum_records = $total_hits; $maximum_records = $request->maximumRecords unless ( ! $request->maximumRecords ); if ($total_hits) { $hits->seek( $start_record, $maximum_records ); while ( my $hit = $hits->fetch_hit_hashref ) { # push on to results push @results, '' . &escape_entities($hit->{title}) . '' . &escape_entities($hit->{creator}) . '' . &escape_entities($hit->{description}) . '' . &escape_entities($hit->{ identifier }) . '' . &escape_entities($hit->{subject}) . '' . &escape_entities($hit->{publisher}) . '' . &escape_entities($hit->{collection}) . '' . &escape_entities($hit->{date}) . '' . &escape_entities($hit->{format}) . '' . &escape_entities($hit->{rights}) . ''; } } # done; return it return ( $total_hits, @results ); } sub escape_entities { # get the input my $s = shift; # escape $s =~ s/&/&/g; $s =~ s//>/g; $s =~ s/"/"/g; $s =~ s/'/'/g; # done return $s; } sub error { print $cgi->header(); print $SRU::Error; exit; }