Date: Thu, 14 Jan 1993 06:58:47 +0200 From: Ari Lemmke <arl@cs.hut.fi> Organization: Helsinki University of Technology, Finland.You might have tool to take out http addresses out of text files or mail, but not all of us have Nexts ;-)
This might be useful for someone.
arl#!/usr/bin/perl
#
# get_www
#
# Get http addresses out of mails [mailing list] or text files.
# Standard mailbox format is used for input.
#
# Use:
# perl get_www < mailbox > foo.html
# www foo.html
#
# This script outputs:
# * the mail Subject: line content as "H1" header,
# * as the text "P" 3 lines before the line http address
# is, and 2 lines after the line.
# * and the http:// address as the anchor text, so
# you can check out directly if it is a valid one.
#
# Kludges: Regexps could be squuzed ;-), but then it would
# be hard to read them.
# The name for this script could be nice, but this
# is another perl-kludge anyway.
#
# @ arl // 1993
#
sub
analyze
{
local(@mymail) = @_;
local($i) = 0;
local(@save) = ();
local($subject) = '';
local($http) = '';
foreach (@mymail)
{
if ( /^Subject:/ )
{
$subject = $_;
$subject =~ s/^Subject://;
$subject =~ s/^[\t ]*//;
}
if ( /http:\/\// )
{
push( @save, "<H1>" . $subject . "</H1>" );
push( @save, "<P>" );
push( @save, @mymail[($i-3)..($i+2)] );
$http = $_;
$http =~ s/.*http/http/;
$http =~ s/[> "]*$//;
push( @save, "<A HREF=\"" . $http . "\" >" .
$http . "</A>" );
}
$i++;
}
foreach (@save)
{
print $_,"\n";
}
}
$have = 0;
@mail = ();
while (<>)
{
chop;
if ( /^From / )
{
if ( $have eq 1 )
{
&analyze( @mail );
@mail = ();
$have = 0;
}
push( @mail, $_ );
}
else
{
push( @mail, $_ );
}
$have = 1;
}
&analyze( @mail );