#!/usr/bin/perl -wT # # Extract RDF info from images # by plh@w3.org # (c) 2005 World Wide Web Consortium # # Largely based on code from the W3C link checker: # http://validator.w3.org/checklink # # $Id: img2rdf.pl,v 1.6 2008/03/27 12:56:28 plehegar Exp $ # # This program is licensed under the W3C(r) Software License: # http://www.w3.org/Consortium/Legal/copyright-software # # The documentation is as follows: # imgGrrdl # # An online version is not yet available # # Comments and suggestions could be sent to plh@w3.org use strict; # Get rid of potentially unsafe and unneeded environment variables. delete(@ENV{qw(IFS CDPATH ENV BASH_ENV)}); $ENV{PATH} = ''; # undef would output warnings with Perl 5.6.1's Cwd.pm. # ----------------------------------------------------------------------------- package W3C::UserAgent; use LWP::RobotUA 1.19 qw(); BEGIN { unshift @INC, "/usr/local/Image-ExifTool/lib"; } use Image::ExifTool qw{:Public :Vars}; @W3C::UserAgent::ISA = qw(LWP::RobotUA); sub new { my $proto = shift; my $class = ref($proto) || $proto; my ($name, $from, $rules) = @_; # For security/privacy reasons, if $from was not given, do not send it. # Cheat by defining something for the constructor, and resetting it later. my $from_ok = $from; $from ||= 'nowhere@w3.org'; # WWW::RobotRules <= 5.78 have bugs which cause suboptimal results with # User-Agent substring matching against robots.txt files; "User-Agent: *" # should work ok with all though, and "User-Agent: W3C-checklink" for >= 5.77 my $self = $class->SUPER::new($name, $from, $rules); $self->from(undef) unless $from_ok; $self->env_proxy(); return $self; } sub simple_request { my $self = shift; my $response = $self->W3C::UserAgent::SUPER::simple_request(@_); if (! defined($self->{FirstResponse})) { $self->{FirstResponse} = $response->code(); $self->{FirstMessage} = $response->message() || '(no message)'; } return $response; } sub redirect_ok { my ($self, $request, $response) = @_; if ($self->{Checklink_verbose_progress}) { # @@@ TODO: when an LWP internal robots.txt request gets redirected, # this will a bit confusingly print out info about it. Would need a # robust way of determining whether something is a LWP "internal" request. &W3C::IMG2RDF::hprintf("\n%s %s ", $request->method(),$request->uri()); } return 0 unless $self->SUPER::redirect_ok($request, $response); if (my $res = &W3C::IMG2RDF::ip_allowed($request->uri())) { $response->previous($response->clone()); $response->request($request); $response->code($res->code()); $response->message($res->message()); return 0; } return 1; } # ----------------------------------------------------------------------------- package W3C::IMG2RDF; use vars qw($AGENT $PACKAGE $PROGRAM $VERSION $REVISION $DocType $Head $Accept $ContentTypes %Cfg); use HTML::Entities qw(); use HTTP::Request qw(); use HTTP::Response qw(); use Time::HiRes qw(); use URI qw(); use URI::Escape qw(); use URI::file qw(); # @@@ Needs also W3C::UserAgent but can't use() it here. use constant RC_ROBOTS_TXT => -1; use constant RC_DNS_ERROR => -2; BEGIN { # Version info $PACKAGE = 'Image to RDF'; $PROGRAM = 'imgGrrdl'; $VERSION = '0.0.1'; $REVISION = sprintf('version %s (c) 2005 W3C', $VERSION); my ($cvsver) = q$Revision: 1.6 $ =~ /(\d+[\d\.]*\.\d+)/; $AGENT = sprintf('%s/%s [%s] %s', $PROGRAM, $VERSION, $cvsver, LWP::RobotUA->_agent()); # Pull in mod_perl modules if applicable. if ($ENV{MOD_PERL}) { eval "require Apache2::compat"; # For mod_perl 2 require Apache; } my @content_types = qw(video/avi image/bmp application/postscript application/dicom image/gif image/jng image/jpeg2000 image/jpeg application/x-magick-image video/mng video/quicktime audio/mpeg video/mp4 image/x-portable-bitmap application/pdf image/x-portable-graymap image/pict image/png image/x-portable-pixmap application/postscript application/photoshop image/x-quicktime image/tiff audio/x-wav application/xmp); $Accept = join(', ', @content_types) . ', */*;q=0.5'; my $re = join('|', map { s/\+/\\+/g; $_ } @content_types); $ContentTypes = qr{\b(?:$re)\b}io; # # Read configuration. If the W3C_CHECKLINK_CFG environment variable has # been set or the default contains a non-empty file, read it. Otherwise, # skip silently. # my $defaultconfig = '/etc/w3c/checklink.conf'; if ($ENV{W3C_CHECKLINK_CFG} || -s $defaultconfig) { require Config::General; Config::General->require_version(2.06); # Need 2.06 for -SplitPolicy my $conffile = $ENV{W3C_CHECKLINK_CFG} || $defaultconfig; eval { my %config_opts = ( -ConfigFile => $conffile, -SplitPolicy => 'equalsign', -AllowMultiOptions => 'no', ); %Cfg = Config::General->new(%config_opts)->getall(); }; if ($@) { die <<".EOF."; Failed to read configuration from '$conffile': $@ .EOF. } } # Trusted environment variables that need laundering in taint mode. foreach (qw(NNTPSERVER NEWSHOST)) { ($ENV{$_}) = ($ENV{$_} =~ /^(.*)$/) if $ENV{$_}; } # Use passive FTP by default, see Net::FTP(3). $ENV{FTP_PASSIVE} = 1 unless exists($ENV{FTP_PASSIVE}); } # Autoflush $| = 1; # Different options specified by the user my $cmdline = ! ($ENV{GATEWAY_INTERFACE} && $ENV{GATEWAY_INTERFACE} =~ /^CGI/); my %Opts = ( Command_Line => $cmdline, Quiet => 0, Verbose => 0, Timeout => 60, Redirects => 1, Dir_Redirects => 1, HTTP_Proxy => undef, Sleep_Time => 1, User => undef, Password => undef, Trusted => $Cfg{Trusted}, Allow_Private_IPs => defined($Cfg{Allow_Private_IPs}) ? $Cfg{Allow_Private_IPs} : $cmdline, ); undef $cmdline; unless ($Opts{Allow_Private_IPs}) { eval { require Net::IP; require Socket; Socket->import('inet_ntoa'); require Net::hostent; }; if ($@) { die <<".EOF."; Allow_Private_IPs is false; this feature requires the Net::IP, Socket, and Net::hostent modules: $@ .EOF. } } # Global variables # What is our query? my $query; # List of redirects my %redirects; # Count of the number of documents checked my $doc_count = 0; # Time stamp my $timestamp = &get_timestamp(); &parse_arguments() if $Opts{Command_Line}; # Precompile/error-check regular expressions. if (defined($Opts{Exclude_Docs})) { eval { $Opts{Exclude_Docs} = qr/$Opts{Exclude_Docs}/o; }; &usage(1, "Error in exclude-docs regexp: $@") if $@; } if (defined($Opts{Trusted})) { eval { $Opts{Trusted} = qr/$Opts{Trusted}/io; }; &usage(1, "Error in trusted domains regexp: $@") if $@; } my $ua = W3C::UserAgent->new($AGENT); # @@@ TODO: admin address # @@@ make number of keep-alive connections customizable $ua->conn_cache({ total_capacity => 1}); # 1 keep-alive connection $ua->delay($Opts{Sleep_Time}/60); $ua->timeout($Opts{Timeout}); $ua->proxy('http', 'http://' . $Opts{HTTP_Proxy}) if $Opts{HTTP_Proxy}; if ($Opts{Command_Line}) { require Text::Wrap; Text::Wrap->import('wrap'); &usage(1) unless @ARGV; &ask_password() if ($Opts{User} && !$Opts{Password}); my $uri = $ARGV[0]; if (!$Opts{Summary_Only}) { # printf("%s %s\n", $PACKAGE, $REVISION); } else { $Opts{Verbose} = 0; $Opts{Progress} = 0; } # Transform the parameter into a URI $uri = &urize($uri); &image2RDF($uri, 0); } else { require CGI; require CGI::Carp; CGI::Carp->import(qw(fatalsToBrowser)); $query = new CGI; # Set a few parameters in CGI mode $Opts{Verbose} = 0; $Opts{Progress} = 0; $Opts{_Self_URI} = $query->url(-relative => 1); # Backwards compatibility my $uri = $query->param('uri'); if (! $uri) { &rdf_header(); &empty_RDF($query); exit; } $Opts{Accept_Language} = undef if $query->param('no_accept_language'); undef $query; # Not needed any more. # All Apache configurations don't set HTTP_AUTHORIZATION for CGI scripts. # If we're under mod_perl, there is a way around it... if ($ENV{MOD_PERL}) { my $auth = Apache->request()->header_in('Authorization'); $ENV{HTTP_AUTHORIZATION} ||= $auth if $auth; } $uri =~ s/^\s+//g; if ($uri =~ m/^file:/) { # Only the http scheme is allowed &file_uri($uri); } elsif ($uri !~ m/:/) { if ($uri =~ m|^//|) { $uri = 'http:'.$uri; } else { $uri = 'http://'.$uri; } } &image2RDF($uri, 1); } ############################################################################### ################################ # Command line and usage stuff # ################################ sub parse_arguments () { require Getopt::Long; Getopt::Long->require_version(2.17); Getopt::Long->import('GetOptions'); Getopt::Long::Configure('bundling', 'no_ignore_case'); my $masq = ''; GetOptions('help|h|?' => sub { usage(0) }, 'q|quiet' => sub { $Opts{Quiet} = 1; $Opts{Summary_Only} = 1; }, 'b|broken' => sub { $Opts{Redirects} = 0; $Opts{Dir_Redirects} = 0; }, 'e|dir-redirects' => sub { $Opts{Dir_Redirects} = 0; }, 'v|verbose' => \$Opts{Verbose}, 'l|location=s' => \$Opts{Base_Location}, 'u|user=s' => \$Opts{User}, 'p|password=s' => \$Opts{Password}, 't|timeout=i' => \$Opts{Timeout}, 'S|sleep=i' => \$Opts{Sleep_Time}, 'd|domain=s' => \$Opts{Trusted}, 'V|version' => \&version ) || usage(1); if ($Opts{Accept_Language} && $Opts{Accept_Language} eq 'auto') { $Opts{Accept_Language} = &guess_language(); } if (($Opts{Sleep_Time} || 0) < 1) { warn("*** Warning: minimum allowed sleep time is 1 second, resetting.\n"); $Opts{Sleep_Time} = 1; } } sub version () { print "$PACKAGE $REVISION\n"; exit 0; } sub usage () { my ($exitval, $msg) = @_; $exitval = 0 unless defined($exitval); $msg ||= ''; $msg =~ s/[\r\n]*$/\n\n/ if $msg; die($msg) unless $Opts{Command_Line}; my $trust = defined($Cfg{Trusted}) ? $Cfg{Trusted} : 'same host only'; select(STDERR) if $exitval; print "$msg$PACKAGE $REVISION Usage: imgGrrdl Options: -q, --quiet No output if no errors are found (implies -s). -v, --verbose Verbose mode. -u, --user USERNAME Specify a username for authentication. -p, --password PASSWORD Specify a password. -t, --timeout SECS Timeout for requests (in seconds). -d, --domain DOMAIN Regular expression describing the domain to which authentication information will be sent (default: $trust). -?, -h, --help Show this message and exit. -V, --version Output version information and exit. See \"perldoc LWP\" for information about proxy server support, \"perldoc Net::FTP\" for information about various environment variables affecting FTP connections and \"perldoc Net::NNTP\" for setting a default NNTP server for news: URIs. The W3C_CHECKLINK_CFG environment variable can be used to set the configuration file to use. See details in the full manual page, it can be displayed with: perldoc checklink Please send bug reports and comments to: plh\@w3.org "; exit $exitval; } sub ask_password () { eval { local $SIG{__DIE__}; require Term::ReadKey; Term::ReadKey->require_version(2.00); Term::ReadKey->import(qw(ReadMode)); }; if ($@) { warn('Warning: Term::ReadKey 2.00 or newer not available, ' . "password input disabled.\n"); return; } printf(STDERR 'Enter the password for user %s: ', $Opts{User}); ReadMode('noecho', *STDIN); chomp($Opts{Password} = ); ReadMode('restore', *STDIN); print(STDERR "ok.\n"); } ############################################################################### ########################################################################### # Guess an Accept-Language header based on the $LANG environment variable # ########################################################################### sub guess_language () { my $lang = $ENV{LANG} or return undef; $lang =~ s/[\.@].*$//; # en_US.UTF-8, fi_FI@euro... return 'en' if ($lang eq 'C' || $lang eq 'POSIX'); my $res = undef; eval { require Locale::Language; if (my $tmp = Locale::Language::language2code($lang)) { $lang = $tmp; } if (my ($l, $c) = (lc($lang) =~ /^([a-z]+)(?:[-_]([a-z]+))?/)) { if (Locale::Language::code2language($l)) { $res = $l; if ($c) { require Locale::Country; $res .= "-$c" if Locale::Country::code2country($c); } } } }; return $res; } ########################################### # Transform foo into file://localhost/foo # ########################################### sub urize ($) { my $u = URI->new_abs(URI::Escape::uri_unescape($_[0]), URI::file->cwd()); return $u->as_string(); } ######################################## # Check for broken links in a resource # ######################################## sub image2RDF ($$$;$) { my ($uri, $header) = @_; my $start = &get_timestamp() unless $Opts{Quiet}; # Get and parse the document my $response = &get_image('GET', $uri, \%redirects); # Can we check the resource? If not, we exit here... return -1 if defined($response->{Stop}); my $absolute_uri = $response->{absolute_uri}->as_string(); my $result_anchor = 'results'.$doc_count; my $esc_uri = URI::Escape::uri_escape($absolute_uri, "^A-Za-z0-9."); my $rdf = &generateRDF($uri, $absolute_uri, $response->content()); if ($header) { print "Content-Type: application/rdf+xml; charset=utf-8\n"; print "Content-Length: ".length($rdf)."\n\n"; } print $rdf; return 0; } sub generateRDF ($$$;\%) { my ($uri, $location, $imageData) = @_; my $exiftool = new Image::ExifTool; my $outBuf = ''; my $value = ''; $exiftool->SetNewValuesFromFile(\$imageData); $exiftool->WriteInfo(undef, \$outBuf, 'XMP'); $outBuf = clean_XMP($outBuf, $location); $value = $exiftool->GetValue("XMP", "Raw"); if ($value) { # do something $outBuf .= clean_XMP($value, $location); } # ok, now we have a clean outBuf, let's see if we can add more info $exiftool->ExtractInfo(\$imageData); my @foundTags = $exiftool->GetFoundTags('File'); my @tagList = Image::ExifTool::GetWritableTags(); my %is_writable; for (@tagList) { $is_writable{$_} = 1 } my $extra = ""; foreach (@foundTags) { if ($is_writable{$_} || / $[0-9]$/) { # skip, it's a duplicate anyway } elsif ($exiftool->GetValue($_)) { my $value = $exiftool->GetValue($_); if ($_ eq "MIMEType") { $_ = $outBuf; if (! //) { $extra .= " ".$value."\n"; } } elsif (ref $value) { # skip, it's something strange } else { # print $_.": ".$value."\n"; } } } $value = $exiftool->GetValue("Copyright"); if ($value) { my $group = $exiftool->GetGroup("Copyright"); if ($group eq "ICC_Profile") { # issue with exitTool 5.87. Remove the Copyright information $outBuf =~ s|[ ]*[ \n]+[ \n]+[^<]+[ \n]+[ \n]+\n*||; $value = $exiftool->GetValue("Copyright (1)"); } $_ = $outBuf; if ($value and ! /GetValue("Keywords", "PrintConv"); if ($keywords) { $outBuf =~ s|([^<]+)|$keywords|; $_ = $outBuf; if (! /GetValue("Keywords", "ValueConv"); my $buf = " \n \n"; foreach (@keywords) { $buf .= " $_\n" } $buf .= " \n \n"; $extra .= $buf; } } if (length($extra) > 0) { $outBuf .= " ".$extra." \n"; } my @fTags; push @fTags, "XMP"; my $info = $exiftool->ImageInfo(\$imageData, \@fTags); return " ".$outBuf."\n"; } sub clean_XMP($$$;\%) { my ($xmp, $location) = @_; # clean up $xmp =~ s/<\?xpacket[^>]+>//g; $xmp =~ s/]+>//g; $xmp =~ s|||g; $xmp =~ s|[ \n]*[ \n]*[ \n]*[ \n]*||; $xmp =~ s|||; $_ = $xmp; my $xmlns = ''; while (/(xmlns:[^=]+='[^']+')/ || /(xmlns:[^=]+="[^"]+")/) { my $rest = $'; $_ = $1; if (! /^xmlns:rdf=/ && ! /^xmlns:x=/) { $xmlns.="\n ".$_; } $_ = $rest; } $xmp =~ s/]+>//g; $xmp =~ s|||g; $xmp =~ s/]+>//g; $xmp =~ s|||g; $xmp =~ s/\s+\n/\n/g; $xmp =~ s/\n\n+/\n/g; $xmp =~ s|([^<]+)||; $xmp =~ s|([^<]+)||; $xmp =~ s|([^<]+)||; $xmp =~ s|([^<]+)||; $xmp =~ s|>[^<]+[^<]+([^<]+)[^<]+[^<]+<| xml:lang="$1">$2<|mg; $xmp =~ s|>[^<]+[^<]+([^<]+)[^<]+[^<]+<|>$1<|mg; $xmp =~ s|>[^<]+[^<]+([^<]+)[^<]+[^<]+<|>$1<|mg; return " ".$xmp." \n"; } ####################################### # Get and parse a resource to process # ####################################### sub get_image ($$$;\%) { my ($method, $uri, $redirects) = @_; # $method contains the HTTP method the use (GET or HEAD) # $uri contains the identifier of the resource # $redirects is a pointer to the hash containing the map of the redirects # Get the resource my $response; $response = &get_uri($method, $uri); if (! $response->is_success()) { if ($response->code() == 401) { &authentication($response); } else { &hprintf("\nError: %d %s\n", $response->code(), $response->message() || '(no message)'); } $response->{Stop} = 1; return($response); } # What is the URI of the resource that we are processing by the way? my $base_uri = URI->new($response->base()); my $request_uri = URI->new($response->request->url); $response->{absolute_uri} = $request_uri->abs($base_uri); # Can we parse the document? my $failed_reason; my $ct = $response->header('Content-Type'); my $ce = $response->header('Content-Encoding'); if (!$ct || $ct !~ $ContentTypes) { $failed_reason = "Content-Type for <$request_uri> is " . (defined($ct) ? "'$ct'" : 'undefined'); } elsif (defined($ce) && $ce ne 'identity') { # @@@ We could maybe handle gzip... $failed_reason = "Content-Encoding for <$request_uri> is '$ce'"; } if ($failed_reason) { # No, there is a problem... &hprintf("Can't generate RDF from %s.\n", $failed_reason); $response->{Stop} = 1; } # Ok, return the information return($response); } ############################ # Get the content of a URI # ############################ sub get_uri ($$;$\%$$$$) { # Here we have a lot of extra parameters in order not to lose information # if the function is called several times (401's) my ($method, $uri, $start, $redirects, $code, $realm, $message, $auth) = @_; # $method contains the method used # $uri contains the target of the request # $start is a timestamp (not defined the first time the function is # called) # $redirects is a map of redirects # $code is the first HTTP return code # $realm is the realm of the request # $message is the HTTP message received # $auth equals 1 if we want to send out authentication information # For timing purposes $start = &get_timestamp() unless defined($start); # Prepare the query my $request = new HTTP::Request($method, $uri); $request->header('Accept-Language' => $Opts{Accept_Language}) if $Opts{Accept_Language}; $request->header('Accept', $Accept); # Are we providing authentication info? if ($auth && $request->url()->host() =~ $Opts{Trusted}) { if (defined($ENV{HTTP_AUTHORIZATION})) { $request->headers->header(Authorization => $ENV{HTTP_AUTHORIZATION}); } elsif (defined($Opts{User}) && defined($Opts{Password})) { $request->authorization_basic($Opts{User}, $Opts{Password}); } } # Check if the IP address is allowed. my $response = &ip_allowed($request->uri()); return $response if $response; # Do the query $response = $ua->request($request); # Get the results # Record the very first response if (! defined($code)) { ($code, $message) = delete(@$ua{qw(FirstResponse FirstMessage)}); } # Authentication requested? if ($response->code() == 401 && !defined($auth) && (defined($ENV{HTTP_AUTHORIZATION}) || (defined($Opts{User}) && defined($Opts{Password})))) { # Set host as trusted domain unless we already have one. if (!$Opts{Trusted}) { my $re = sprintf('^%s$', quotemeta($response->base()->host())); $Opts{Trusted} = qr/$re/io; } # Deal with authentication and avoid loops if (! defined($realm)) { $response->headers->www_authenticate =~ /Basic realm=\"([^\"]+)\"/; $realm = $1; } return &get_uri($method, $response->request()->url(), $start, $redirects, $code, $realm, $message, 1); } $response->{Realm} = $realm if defined($realm); return $response; } sub escape_match ($\%) { my ($a, $hash) = (URI::Escape::uri_unescape($_[0]), $_[1]); foreach my $b (keys %$hash) { return 1 if ($a eq URI::Escape::uri_unescape($b)); } return 0; } ########################## # Ask for authentication # ########################## sub authentication ($) { my $r = $_[0]; $r->headers->www_authenticate =~ /Basic realm=\"([^\"]+)\"/; my $realm = $1; $realm = '' unless defined($realm); if ($Opts{Command_Line}) { printf STDERR <request()->url(), $realm; Authentication is required for %s. The realm is "%s". Use the -u and -p options to specify a username and password and the -d option to specify trusted domains. EOF } else { printf("Status: 401 Authorization Required\nWWW-Authenticate: %s\nConnection: close\nContent-Type: application/rdf+xml; charset=utf-8\n\n", $r->headers->www_authenticate); print " "; if ($Opts{Trusted}) { printf < EOF } print "\n"; } } ################## # Get statistics # ################## sub get_timestamp () { return pack('LL', Time::HiRes::gettimeofday()); } sub time_diff ($$) { my @start = unpack('LL', $_[0]); my @stop = unpack('LL', $_[1]); for ($start[1], $stop[1]) { $_ /= 1_000_000; } return(sprintf("%.1f", ($stop[0]+$stop[1])-($start[0]+$start[1]))); } ######################## # Handle the redirects # ######################## # Record the redirects in a hash sub record_redirects (\%$) { my ($redirects, $response) = @_; for (my $prev = $response->previous(); $prev; $prev = $prev->previous()) { $redirects->{$prev->request()->url()} = $response->request()->url(); } } # Determine if a request is redirected sub is_redirected ($%) { my ($uri, %redirects) = @_; return(defined($redirects{$uri})); } # Get a list of redirects for a URI sub get_redirects ($%) { my ($uri, %redirects) = @_; my @history = ($uri); my %seen = ($uri => 1); # for tracking redirect loops my $loop = 0; while ($redirects{$uri}) { $uri = $redirects{$uri}; push(@history, $uri); if ($seen{$uri}) { $loop = 1; last; } else { $seen{$uri}++; } } return ($loop, @history); } #################################################### # Tool for sorting the unique elements of an array # #################################################### sub sort_unique (@) { my %saw; @saw{@_} = (); return (sort { $a <=> $b } keys %saw); } # # Checks whether we're allowed to retrieve the document based on it's IP # address. Takes an URI object and returns a HTTP::Response containing the # appropriate status and error message if the IP was disallowed, undef # otherwise. URIs without hostname or IP address are always allowed, # including schemes where those make no sense (eg. data:, often javascript:). # sub ip_allowed ($) { my ($uri) = @_; return undef if $Opts{Allow_Private_IPs}; # Short-circuit my $hostname = undef; eval { $hostname = $uri->host() }; # Not all URIs implement host()... return undef unless $hostname; my $addr = my $iptype = my $resp = undef; if (my $host = Net::hostent::gethostbyname($hostname)) { $addr = inet_ntoa($host->addr()) if $host->addr(); if ($addr && (my $ip = Net::IP->new($addr))) { $iptype = $ip->iptype(); } } if ($iptype && $iptype ne 'PUBLIC') { $resp = HTTP::Response->new(403, 'Checking non-public IP address disallowed by img2rdf configuration'); } return $resp; } ############################################################################### ################ # Global stats # ################ sub global_stats () { my $stop = &get_timestamp(); my $n_docs = ($doc_count <= $Opts{Max_Documents}) ? $doc_count : $Opts{Max_Documents}; return sprintf('Checked %d document%s in %s seconds.', $n_docs, ($n_docs == 1) ? '' : 's', &time_diff($timestamp, $stop)); } ################## # RDF interface # ################## sub rdf_header () { # mod_perl 1.99_05 doesn't seem to like if the "\n\n" isn't in the same # print() statement as the last header... my $headers = ''; if (! $Opts{Command_Line}) { $headers .= "Content-Type: application/rdf+xml; charset=utf-8\n\n"; } print $headers; } sub file_uri ($) { my ($uri) = @_; &rdf_header(); empty_RDF(); exit; } sub empty_RDF () { print " "; } sub encode (@) { return HTML::Entities::encode(@_); } sub hprintf (@) { if (! $Opts{HTML}) { printf(@_); } else { print HTML::Entities::encode(sprintf($_[0], @_[1..@_-1])); } } # Local Variables: # mode: perl # indent-tabs-mode: nil # tab-width: 2 # perl-indent-level: 2 # End: # ex: ts=2 sw=2 et