#!/usr/bin/perl |
use LWP; |
use LWP::Simple; |
$SIG { INT } = \&get_out; |
my $url = 'http://www.airenti.org/Html/Type/1_1.html' ; |
my $url_girls = 'http://www.airenti.org/Html/' ; |
my $local_path = '/cygdrive/d/Downloads/art/' ; |
my $crt_file = '' ; |
my $tmp_dir = '.art' ; |
my @HEAD = ( |
'Host' => 'processbase.neusoft.com' , |
'User-Agent' => 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:10.0.1) Gecko/20100101 Firefox/10.0.1' , |
'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8' , |
'Accept-Language' => 'en-us,en;q=0.5' , |
'Accept-Encoding' => 'gzip, deflate' , |
'Connection' => 'keep-alive' , |
); |
my $browser = LWP::UserAgent->new(); |
my $response = $browser ->get( $url ); |
my $index_page = $response ->content; |
if ( $index_page =~ m{<a\s+href= "(.*?)" .*?alt= "(.*?)" .*?\2}) { |
my $girl_url = $1 ; |
$girl_url =~ s/\.\.\// $url_girls /; |
while ( $girl_url ) { |
$girl_url = get_girl_pics( $girl_url ); |
} |
} |
sub get_girl_pics { |
my $url = shift ; |
my $girl_res = $browser ->get( $url ); |
my $page_content = $girl_res ->content; |
my $girl_no = $url ; |
$girl_no =~ s/.*\/(\d+)_.*/ $1 /; |
my $pageindex = $url ; |
$pageindex =~ s{/[^/]+$}{}; |
if ( $page_content =~ /<(title)>(.*?)<\/\1>/) { |
my $title = $2 ; |
print $title . "\n" ; |
if (!-d $local_path . $title ) { |
my $tmp_path = $local_path . $title . $tmp_dir ; |
-d $tmp_path or mkdir ( $tmp_path ); |
get_girl_pic( $page_content , $title ); |
while ( $page_content =~ m{.*?href= "(${girl_no}_(\d+)\.html)" >\2}g) { |
get_girl_pic( $browser ->get( $pageindex . '/' . $1 )->content, $title ); |
} |
rename ( $tmp_path , $local_path . $title ); |
} |
} |
my $no_next = '下一组:没有了' ; |
my $next = '下一组:' ; |
if ( $page_content =~ / $no_next /m) |
{ |
return 0; |
} |
elsif ( $page_content =~ /.*<a href= "(.*?)" > $next /m) { |
return $pageindex . '/' . $1 ; |
} |
else { |
return 0; |
} |
} |
sub get_girl_pic { |
my $page = shift ; |
my $title = shift ; |
while ( $page =~ m{<img\s+src= "(.*?)" \s+alt= "$title" }g) { |
my $pic_file = $1 ; |
$pic_file =~ s/^\s+//; |
$pic_file =~ s/\s+$//; |
my $local_file = $pic_file ; |
$local_file =~ s/.*\///; |
$local_file = $local_path . $title . $tmp_dir . '/' . $local_file ; |
if (-e $local_file ) { |
#print "\t".$pic_file."\n"; |
#print "\t已经有了!\n"; |
} |
else { |
print "\t" . $pic_file . "\n" ; |
print "\t => " . $local_file . "\n" ; |
$crt_file = $local_file ; |
LWP::Simple::getstore( $pic_file , $local_file ); |
$crt_file = '' ; |
} |
} |
} |
sub get_out { |
if ( $crt_file ) { |
unlink ( $crt_file ); |
} |
exit ; |
} |
by: 发表于:2017-09-11 11:50:52 顶(0) | 踩(0) 回复
??
回复评论