[perl]代码库
#!/usr/bin/perl
use LWP;
use LWP::Simple;
$SIG{INT} = \&get_out;
my $url = 'http://www.airenti.org/Html/Type/1_1.html';
my $url_girls = 'http://www.airenti.org/Html/';
my $local_path = '/cygdrive/d/Downloads/art/';
my $crt_file = '';
my $tmp_dir = '.art';
my @HEAD = (
'Host' => 'processbase.neusoft.com',
'User-Agent' => 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:10.0.1) Gecko/20100101 Firefox/10.0.1',
'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language' => 'en-us,en;q=0.5',
'Accept-Encoding' => 'gzip, deflate',
'Connection' => 'keep-alive',
);
my $browser = LWP::UserAgent->new();
my $response = $browser->get($url);
my $index_page = $response->content;
if ($index_page =~ m{<a\s+href="(.*?)".*?alt="(.*?)".*?\2}) {
my $girl_url = $1;
$girl_url =~ s/\.\.\//$url_girls/;
while ($girl_url) {
$girl_url = get_girl_pics($girl_url);
}
}
sub get_girl_pics {
my $url = shift;
my $girl_res = $browser->get($url);
my $page_content = $girl_res->content;
my $girl_no = $url;
$girl_no =~ s/.*\/(\d+)_.*/$1/;
my $pageindex = $url;
$pageindex =~ s{/[^/]+$}{};
if ($page_content =~ /<(title)>(.*?)<\/\1>/) {
my $title = $2;
print $title."\n";
if (!-d $local_path.$title) {
my $tmp_path = $local_path.$title.$tmp_dir;
-d $tmp_path or mkdir($tmp_path);
get_girl_pic($page_content, $title);
while ($page_content =~ m{.*?href="(${girl_no}_(\d+)\.html)">\2}g) {
get_girl_pic($browser->get($pageindex.'/'.$1)->content, $title);
}
rename($tmp_path, $local_path.$title);
}
}
my $no_next = '下一组:没有了';
my $next = '下一组:';
if ($page_content =~ /$no_next/m)
{
return 0;
}
elsif ($page_content =~ /.*<a href="(.*?)">$next/m) {
return $pageindex.'/'.$1;
}
else {
return 0;
}
}
sub get_girl_pic {
my $page = shift;
my $title = shift;
while ($page =~ m{<img\s+src="(.*?)"\s+alt="$title"}g) {
my $pic_file = $1;
$pic_file =~ s/^\s+//;
$pic_file =~ s/\s+$//;
my $local_file = $pic_file;
$local_file =~ s/.*\///;
$local_file = $local_path.$title.$tmp_dir.'/'.$local_file;
if (-e $local_file) {
#print "\t".$pic_file."\n";
#print "\t已经有了!\n";
}
else {
print "\t".$pic_file."\n";
print "\t => ".$local_file."\n";
$crt_file = $local_file;
LWP::Simple::getstore($pic_file, $local_file);
$crt_file = '';
}
}
}
sub get_out {
if ($crt_file) {
unlink ($crt_file);
}
exit;
}
by: 发表于:2017-09-11 11:50:52 顶(0) | 踩(0) 回复
??
回复评论