[perl]代码库
#-----------------------------
use HTML::LinkExtor;
$parser = HTML::LinkExtor->new ( undef, $base_url );
$parser->parse_file ( $filename );
@links = $parser->links;
foreach $linkarray ( @links )
{
my @element = @$linkarray;
my $elt_type = shift @element;
# element type
# possibly test whether this is an element we're interested in
while (@element) {
# extract the next attribute and its value
my ($attr_name, $attr_value) = splice(@element, 0, 2);
# ... do something with them ...
}
}
#-----------------------------
<A HREF="http://www.perl.com/">Home page</A>
<IMG SRC="images/big.gif" LOWSRC="images/big-lowres.gif">
#-----------------------------
[
[ a, href => "http://www.perl.com/" ],
[ img, src => "images/big.gif",
lowsrc => "images/big-lowres.gif" ]
]
#-----------------------------
if ($elt_type eq 'a' && $attr_name eq 'href') {
print "ANCHOR: $attr_value\n"
if $attr_value->scheme =~ /http|ftp/;
}
if ($elt_type eq 'img' && $attr_name eq 'src') {
print "IMAGE: $attr_value\n";
}
#-----------------------------
# download the following standalone program
#!/usr/bin/perl -w
# xurl - extract unique, sorted list of links from URL
use HTML::LinkExtor;
use LWP::Simple;
$base_url = shift;
$parser = HTML::LinkExtor->new(undef, $base_url);
$parser->parse(get($base_url))->eof;
@links = $parser->links;
foreach $linkarray (@links) {
my @element = @$linkarray;
my $elt_type = shift @element;
while (@element) {
my ($attr_name , $attr_value) = splice(@element, 0, 2);
$seen{$attr_value}++;
}
}
for (sort keys %seen) { print $_, "\n" }
#-----------------------------
#% xurl http://www.perl.com/CPAN
#ftp://ftp@ftp.perl.com/CPAN/CPAN.html
#
#http://language.perl.com/misc/CPAN.cgi
#
#http://language.perl.com/misc/cpan_module
#
#http://language.perl.com/misc/getcpan
#
#http://www.perl.com/index.html
#
#http://www.perl.com/gifs/lcb.xbm
#-----------------------------
<URL:http://www.perl.com>
#-----------------------------
@URLs = ($message =~ /<URL:(.*?)>/g);
#-----------------------------
by: 发表于:2017-09-18 17:49:34 顶(0) | 踩(0) 回复
??
回复评论