#----------------------------- |
use HTML::LinkExtor; |
$parser = HTML::LinkExtor->new ( undef , $base_url ); |
$parser ->parse_file ( $filename ); |
@links = $parser ->links; |
foreach $linkarray ( @links ) |
{ |
my @element = @ $linkarray ; |
my $elt_type = shift @element ; |
# element type |
# possibly test whether this is an element we're interested in |
while ( @element ) { |
# extract the next attribute and its value |
my ( $attr_name , $attr_value ) = splice ( @element , 0, 2); |
# ... do something with them ... |
} |
} |
#----------------------------- |
<A HREF= "http://www.perl.com/" >Home page</A> |
<IMG SRC= "images/big.gif" LOWSRC= "images/big-lowres.gif" > |
#----------------------------- |
[ |
[ a, href => "http://www.perl.com/" ], |
[ img, src => "images/big.gif" , |
lowsrc => "images/big-lowres.gif" ] |
] |
#----------------------------- |
if ( $elt_type eq 'a' && $attr_name eq 'href' ) { |
print "ANCHOR: $attr_value\n" |
if $attr_value ->scheme =~ /http|ftp/; |
} |
if ( $elt_type eq 'img' && $attr_name eq 'src' ) { |
print "IMAGE: $attr_value\n" ; |
} |
#----------------------------- |
# download the following standalone program |
#!/usr/bin/perl -w |
# xurl - extract unique, sorted list of links from URL |
use HTML::LinkExtor; |
use LWP::Simple; |
$base_url = shift ; |
$parser = HTML::LinkExtor->new( undef , $base_url ); |
$parser ->parse(get( $base_url ))-> eof ; |
@links = $parser ->links; |
foreach $linkarray ( @links ) { |
my @element = @ $linkarray ; |
my $elt_type = shift @element ; |
while ( @element ) { |
my ( $attr_name , $attr_value ) = splice ( @element , 0, 2); |
$seen { $attr_value }++; |
} |
} |
for ( sort keys %seen ) { print $_ , "\n" } |
#----------------------------- |
#% xurl http://www.perl.com/CPAN |
#ftp://ftp@ftp.perl.com/CPAN/CPAN.html |
# |
#http://language.perl.com/misc/CPAN.cgi |
# |
#http://language.perl.com/misc/cpan_module |
# |
#http://language.perl.com/misc/getcpan |
# |
#http://www.perl.com/index.html |
# |
#http://www.perl.com/gifs/lcb.xbm |
#----------------------------- |
<URL:http://www.perl.com> |
#----------------------------- |
@URLs = ( $message =~ /<URL:(.*?)>/g); |
#----------------------------- |
by: 发表于:2017-09-18 17:49:34 顶(0) | 踩(0) 回复
??
回复评论