Date: Thu, 14 Jan 1993 06:58:47 +0200
From: Ari Lemmke <arl@cs.hut.fi>
Organization: Helsinki University of Technology, Finland.


You might have tool to take out http addresses out of text files or mail, but not all of us have Nexts ;-)

This might be useful for someone.

arl
#!/usr/bin/perl
#
#	get_www
#
#	Get http addresses out of mails [mailing list] or text files.
#	Standard mailbox format is used for input.
#
#	Use:
#		perl get_www < mailbox > foo.html
#		www foo.html
#
#	This script outputs:
#		* the mail Subject: line content as "H1" header,
#		* as the text "P" 3 lines before the line http address
#		  is, and 2 lines after the line.
#		* and the http:// address as the anchor text, so
#		  you can check out directly if it is a valid one.
#
#	Kludges: Regexps could be squuzed ;-), but then it would
#		 be hard to read them.
#		 The name for this script could be nice, but this
#		 is another perl-kludge anyway.
#
#	@ arl // 1993
#

sub
analyze
{
	local(@mymail)		= @_;
	local($i)		= 0;
	local(@save)		= ();
	local($subject)		= '';
	local($http)		= '';

	foreach (@mymail)
	{
		if ( /^Subject:/ )
		{
			$subject = $_;
			$subject =~ s/^Subject://;
			$subject =~ s/^[\t ]*//;
		}

		if ( /http:\/\// )
		{
			push( @save, "<H1>" . $subject . "</H1>" );
			push( @save, "<P>" );
			push( @save, @mymail[($i-3)..($i+2)] );
			$http = $_;
			$http =~ s/.*http/http/;
			$http =~ s/[> "]*$//;
			push( @save, "<A HREF=\"" . $http . "\" >" .
				$http . "</A>" );
		}

		$i++;
	}

	foreach (@save)
	{
		print $_,"\n";
	}
}

$have		= 0;
@mail		= ();

while (<>)
{
	chop;

	if ( /^From / )
	{
		if ( $have eq 1 )
		{
			&analyze( @mail );
			@mail = ();
			$have = 0;
		}
		push( @mail, $_ );
	}
	else
	{
		push( @mail, $_ );
	}

	$have = 1;
}

&analyze( @mail );