GoogleScanner
时间:2010-09-20
来源:互联网
几年前写的旧代码,好久不用了,现在还能用。此代码既能能用来做好事也能用来做坏事,本来用来给老婆查论文资料的,结果她用不惯命令行的东西,就算了。做坏事就不用我说了吧。
配置文件如下格式
#########################
#=任意区域
#countryTD=乍得
#countryGI=直布罗陀
#countryCL=智利
#countryCF=中非共和国
#countryCN=中国
#countryMO=中国澳门特别行政区
#countryHK=中国香港特别行政区
country=all
inurl:/index.htm
#########################
复制代码
配置文件如下格式
#########################
#=任意区域
#countryTD=乍得
#countryGI=直布罗陀
#countryCL=智利
#countryCF=中非共和国
#countryCN=中国
#countryMO=中国澳门特别行政区
#countryHK=中国香港特别行政区
country=all
inurl:/index.htm
#########################
- #!/usr/bin/perl -w
-
- #By xti9er
-
- require LWP::UserAgent;
- use LWP::Simple;
- use Color::Output;
- Color::Output::Init;
-
- $|=1;
-
- #----------------------------
- #从配置文件中提取字符串
- #----------------------------
- my $inrulfile=shift||"inurl.ini";
- my $countryset="";
- open(INURL,$inrulfile) or die $!;
- while($inurl=<INURL>)
- {
- chomp($inurl);
- next if $inurl=~/^#/;
- if($inurl=~/^country=(\w+)/i)
- {
- $countryset="country$1";
- }
- elsif($inurl=~/^country=$/i)
- {
-
- }
- else{
- push(@inurl,$inurl);
- }
- }
- close INURL;
-
- for my $inurl(@inurl)
- {
- $inurl=~s/\s/\+/g;
- $reginurl=$inurl;
- $reginurl=~s/\?/\\?/g;
- $reginurl=~s/\=/\\=/g;
- $reginurl=~s/\:/\\:/g;
- $reginurl=~s/\+/\\s/g;
-
- $sleeptime=5;
-
- #cprin("Sleeptime=[$sleeptime]\t Url=[$inurl]\t RegUrl=[$reginurl]\t country=[$countryset]\n",7);
- sleep(2);
-
- my $sdco=0;
- my $hostno=0;
- my $cksd=0;
- my $p75="-"x80;
-
- my $ua = LWP::UserAgent->new;
- $ua->timeout(20);
- $ua->env_proxy;
- $ua->agent("Mozilla/5.0");
- #print "Start ...\n";
-
- cprin("\t\t\t --=G o o g l e \t S c a n n e r=-- \n",5);
- cprin("\t\t\t By xti9er \n",13);
- cprin("$p75\n",7);
- $stime=time;
- $lstime=localtime();
- cprin("\t\t\tStart at $lstime\n",13);
- my $searchweb="http://www.google.com.hk";
- my $startpage=0;
-
- print "[Google] page:$searchweb/search?num=100&complete=1&hl=zh-CN&cr=$countryset&newwindow=1&q=$inurl&start=0&sa=N/\n";
-
- my $response = $ua->get("$searchweb/search?num=100&complete=1&hl=zh-CN&cr=$countryset&newwindow=1&q=$inurl&start=0&sa=N/")
- or (cprin("[Start] Get google start page faild:$!",5) and next);
-
- if ($response->is_success)
- {
- $getre=$response->content;
- @getlog=split(/href=/,$getre);
- $getco=0;
- for(@getlog)
- {
- $getco++;
-
- if($_=~/id=resultStats>(.*)?</)
- {
- $ttpageno=$1;
- $ttpageno=~s/\,//g;
- $ttpageno=~s/\D//g;
- $ttpageno=int($ttpageno/100);
- cprin("\t\t\t$ttpageno Google Pages To Read!\n$p75\n",5);
- sleep(2);
- for($startpage=0;$startpage<=$ttpageno;$startpage=$startpage+100)
- {
- sleep($sleeptime);
- cprin("Now Read The ".$startpage."th Page!\n--------------------------------------\n",13);
- spider($startpage,$inurl);
- }
- }
- }
- }
- else
- {
- cprin($response->status_line,5);next;
- }
-
- }
-
- sub spider
- {
- $pageno=shift;
- my $inurl=shift;
-
- sleep($sleeptime);
- open(WLOG,"+>>$stime.log") or (cprin($!,5) and next);
- my $searchweb="http://www.google.com.hk";
- my $searchurl="$searchweb/search?num=100&complete=1&hl=zh-CN&cr=$countryset&newwindow=1&q=$inurl&start=$pageno&sa=N/";
- my $nextpage=0;
- my $nextpageno=$pageno+100;
- cprin("Now URL: $searchurl\n",7);
-
- my $ua = LWP::UserAgent->new;
- $ua->timeout(20);
- $ua->env_proxy;
- $ua->agent("Mozilla/5.0");
- my $response = $ua->get($searchurl);
- if ($response->is_success)
- {
- $getre=$response->content; # or whatever
- @getlog=split(/href=/,$getre);
- }
- else
- {
- print "Get page count faild!\n";
- cprin($response->status_line,5);next;
- }
- my @sites;
- foreach $urlre(@getlog){
- if($urlre=~/^\"http\:\/\/(.*?)\"\starget=_blank\sclass=l/){
- push(@sites,"http://$1");
- }
-
- if($urlre=~/\/search\?.*?q=.*?start=$nextpageno\&\;sa=N/){
- $nextpage++;
- }
- }
-
- my %seen=();
- @sites = grep { !$seen{$_} ++ } @sites;
- $siteno=0;
-
- for my $nowsite (@sites)
- {
- $siteno++;
-
- print "URL:$nowsite\n";
- print WLOG $nowsite,"\n";
- }
- close WLOG;
- if($nextpage==0)
- {
- cprin("\t\t\tThe End\n",13);
- goto SPIDEREND;
- }
- }
-
- SPIDEREND:
- $etime=time;
- $ttime=$etime-$stime;
- print stime($ttime);
-
- sub cprin
- {
- ($str,$i)=@_;
- cprint("\x03" . $i . "$str\n\x030");
- }
-
- sub stime
- {
- my $stime=shift;
- my $hour=int($stime/(60*60));
- my $minute=int(($stime-($hour*60*60))/60);
- my $second=$stime-$hour*60*60-$minute*60;
- return ("$hour hours $minute mins $second secs");
- }
作者: xti9er 发布时间: 2010-09-20
厉害,学习一下
不过没做过坏事,还真不知道怎么用来做坏事,哈哈
不过没做过坏事,还真不知道怎么用来做坏事,哈哈
作者: liht1981 发布时间: 2010-09-20
学习一下~
作者: x9x9 发布时间: 2010-09-20
相关阅读 更多
热门阅读
-
office 2019专业增强版最新2021版激活秘钥/序列号/激活码推荐 附激活工具
阅读:74
-
如何安装mysql8.0
阅读:31
-
Word快速设置标题样式步骤详解
阅读:28
-
20+道必知必会的Vue面试题(附答案解析)
阅读:37
-
HTML如何制作表单
阅读:22
-
百词斩可以改天数吗?当然可以,4个步骤轻松修改天数!
阅读:31
-
ET文件格式和XLS格式文件之间如何转化?
阅读:24
-
react和vue的区别及优缺点是什么
阅读:121
-
支付宝人脸识别如何关闭?
阅读:21
-
腾讯微云怎么修改照片或视频备份路径?
阅读:28