网页快照类
时间:2007-07-09
来源:互联网
CODE:
<?PHP
//====================================================
// FileName: snap.class.php
// Summary: 网页快照类
// Author: millken(迷路林肯)
// LastModifed:2007-06-29
// copyright (c)2007 [email protected]
//====================================================
class snap{
var $dir;
var $log;
var $contents;
var $filename;
var $host;
var $name;
var $data_ts;
var $ttl;
var $url;
var $ts;
function snap(){
$this->log = "New snap() object instantiated.<br />n";
$this->dir = dirname(__FILE__)."/";
}
function fetch($url="",$ttl=10){
$this->log .= "--------------------------------<br />fetch() called<br />n";
$this->log .= "url: ".$url."<br />n";
$hosts = parse_url($url);
$this->host = $hosts['scheme'].'://'.$hosts['host'].'/';
if (!$url) {
$this->log .= "OOPS: You need to pass a URL!<br />";
return false;
}
$this->ttl = $ttl;
$this->url = $url;
$this->name = md5($this->url);
$this->filename = $this->dir.$this->name;
$this->log .= "Filename: ".$this->filename."<br />";
$this->getFile_ts();
$this->file_get_content();
}
function file_get_content(){
ob_start();
$this->ts = time() - $this->data_ts;
if($this->data_ts <>0 && $this->ts <= $this->ttl){
$this->log .= "cache has expired<br />";
@readfile($this->filename);
$this->contents = ob_get_contents();
ob_end_clean();
}else{
$this->log .= "cache hasn't expired<br />";
@readfile($this->url);
$this->contents = ob_get_contents();
ob_end_clean();
$this->saveToCache();
}
return true;
}
<?PHP
//====================================================
// FileName: snap.class.php
// Summary: 网页快照类
// Author: millken(迷路林肯)
// LastModifed:2007-06-29
// copyright (c)2007 [email protected]
//====================================================
class snap{
var $dir;
var $log;
var $contents;
var $filename;
var $host;
var $name;
var $data_ts;
var $ttl;
var $url;
var $ts;
function snap(){
$this->log = "New snap() object instantiated.<br />n";
$this->dir = dirname(__FILE__)."/";
}
function fetch($url="",$ttl=10){
$this->log .= "--------------------------------<br />fetch() called<br />n";
$this->log .= "url: ".$url."<br />n";
$hosts = parse_url($url);
$this->host = $hosts['scheme'].'://'.$hosts['host'].'/';
if (!$url) {
$this->log .= "OOPS: You need to pass a URL!<br />";
return false;
}
$this->ttl = $ttl;
$this->url = $url;
$this->name = md5($this->url);
$this->filename = $this->dir.$this->name;
$this->log .= "Filename: ".$this->filename."<br />";
$this->getFile_ts();
$this->file_get_content();
}
function file_get_content(){
ob_start();
$this->ts = time() - $this->data_ts;
if($this->data_ts <>0 && $this->ts <= $this->ttl){
$this->log .= "cache has expired<br />";
@readfile($this->filename);
$this->contents = ob_get_contents();
ob_end_clean();
}else{
$this->log .= "cache hasn't expired<br />";
@readfile($this->url);
$this->contents = ob_get_contents();
ob_end_clean();
$this->saveToCache();
}
return true;
}
作者: samsung 发布时间: 2007-07-09
function saveToCache(){
$this->log .= "saveToCache() called<br />";
//create file pointer
if (!$fp=@fopen($this->filename,"w")) {
$this->log .= "Could not open ".$this->filename."<br />";
return false;
}
$this->contents = $this->formaturl($this->contents,$this->host);
$this->contents = preg_replace("'<script[^>]*?>.*?</script>'si","",$this->contents);
//write to file
if (!@fwrite($fp,$this->contents)) {
$this->log .= "Could not write to ".$this->filename."<br />";
fclose($fp);
return false;
}
//close file pointer
fclose($fp);
return true;
}
function getFile_ts(){
$this->log .= "getFile_ts() called<br />";
if (!file_exists($this->filename)) {
$this->data_ts = 0;
$this->log .= $this->filename." does not exist<br />";
return false;
}
$this->data_ts = filemtime($this->filename);
return true;
}
function formaturl($l1,$l2){
if (preg_match_all("/(<img[^>]+src=\"([^\"]+)\"[^>]*>)|(<link[^>]+href=\"([^\"]+)\"[^>]*>)|(<a[^>]+href=\"([^\"]+)\"[^>]*>)|(<img[^>]+src='([^']+)'[^>]*>)|(<a[^>]+href='([^']+)'[^>]*>)/i",$l1,$regs)){
foreach($regs[0] as $num => $url){
$l1 = str_replace($url,$this->lIIIIl($url,$l2),$l1);
}
}
return $l1;
}
$this->log .= "saveToCache() called<br />";
//create file pointer
if (!$fp=@fopen($this->filename,"w")) {
$this->log .= "Could not open ".$this->filename."<br />";
return false;
}
$this->contents = $this->formaturl($this->contents,$this->host);
$this->contents = preg_replace("'<script[^>]*?>.*?</script>'si","",$this->contents);
//write to file
if (!@fwrite($fp,$this->contents)) {
$this->log .= "Could not write to ".$this->filename."<br />";
fclose($fp);
return false;
}
//close file pointer
fclose($fp);
return true;
}
function getFile_ts(){
$this->log .= "getFile_ts() called<br />";
if (!file_exists($this->filename)) {
$this->data_ts = 0;
$this->log .= $this->filename." does not exist<br />";
return false;
}
$this->data_ts = filemtime($this->filename);
return true;
}
function formaturl($l1,$l2){
if (preg_match_all("/(<img[^>]+src=\"([^\"]+)\"[^>]*>)|(<link[^>]+href=\"([^\"]+)\"[^>]*>)|(<a[^>]+href=\"([^\"]+)\"[^>]*>)|(<img[^>]+src='([^']+)'[^>]*>)|(<a[^>]+href='([^']+)'[^>]*>)/i",$l1,$regs)){
foreach($regs[0] as $num => $url){
$l1 = str_replace($url,$this->lIIIIl($url,$l2),$l1);
}
}
return $l1;
}
作者: samsung 发布时间: 2007-07-09
function lIIIIl($l1,$l2){
if(preg_match("/(.*)(href|src)=(.+?)( |/>|>).*/i",$l1,$regs)){$I2 = $regs[3];}
if(strlen($I2)>0){
$I1 = str_replace(chr(34),"",$I2);
$I1 = str_replace(chr(39),"",$I1);
}else{return $l1;}
$url_parsed = parse_url($l2);
$scheme = $url_parsed["scheme"];if($scheme!=""){$scheme = $scheme."://";}
$host = $url_parsed["host"];
$l3 = $scheme.$host;
if(strlen($l3)==0){return $l1;}
$path = dirname($url_parsed["path"]);if($path[0]=="\"){$path="";}
$pos = strpos($I1,"#");
if($pos>0) $I1 = substr($I1,0,$pos);
//判断类型
if(preg_match("/^(http|https|ftp):(//|\\)(([w/\+-~`@:%])+.)+([w/\.=?+-~`@':!%#]|(&)|&)+/i",$I1)){return $l1; }//http开头的url类型要跳过
elseif($I1[0]=="/"){$I1 = $l3.$I1;}//绝对路径
elseif(substr($I1,0,3)=="../"){//相对路径
while(substr($I1,0,3)=="../"){
$I1 = substr($I1,strlen($I1)-(strlen($I1)-3),strlen($I1)-3);
if(strlen($path)>0){
$path = dirname($path);
}
}
$I1 = $l3.$path."/".$I1;
}
elseif(substr($I1,0,2)=="./"){
$I1 = $l3.$path.substr($I1,strlen($I1)-(strlen($I1)-1),strlen($I1)-1);
}
elseif(strtolower(substr($I1,0,7))=="mailto:"||strtolower(substr($I1,0,11))=="javascript:"){
return $l1;
}else{
$I1 = $l3.$path."/".$I1;
}
return str_replace($I2,"\"$I1\"",$l1);
}
}
?>用法test.php:
CODE:
<?php
require_once(dirname(__FILE__).'/snap.class.php');
$h = new snap();
$h->fetch($_GET['url']);
//echo $h->log;
echo $h->contents;
?>
$ttl为更新快照文件的周期,其中的URL补全函数用到了村里某个兄弟的原创,
对于不好的地方,希望大家将此类完善下去。
if(preg_match("/(.*)(href|src)=(.+?)( |/>|>).*/i",$l1,$regs)){$I2 = $regs[3];}
if(strlen($I2)>0){
$I1 = str_replace(chr(34),"",$I2);
$I1 = str_replace(chr(39),"",$I1);
}else{return $l1;}
$url_parsed = parse_url($l2);
$scheme = $url_parsed["scheme"];if($scheme!=""){$scheme = $scheme."://";}
$host = $url_parsed["host"];
$l3 = $scheme.$host;
if(strlen($l3)==0){return $l1;}
$path = dirname($url_parsed["path"]);if($path[0]=="\"){$path="";}
$pos = strpos($I1,"#");
if($pos>0) $I1 = substr($I1,0,$pos);
//判断类型
if(preg_match("/^(http|https|ftp):(//|\\)(([w/\+-~`@:%])+.)+([w/\.=?+-~`@':!%#]|(&)|&)+/i",$I1)){return $l1; }//http开头的url类型要跳过
elseif($I1[0]=="/"){$I1 = $l3.$I1;}//绝对路径
elseif(substr($I1,0,3)=="../"){//相对路径
while(substr($I1,0,3)=="../"){
$I1 = substr($I1,strlen($I1)-(strlen($I1)-3),strlen($I1)-3);
if(strlen($path)>0){
$path = dirname($path);
}
}
$I1 = $l3.$path."/".$I1;
}
elseif(substr($I1,0,2)=="./"){
$I1 = $l3.$path.substr($I1,strlen($I1)-(strlen($I1)-1),strlen($I1)-1);
}
elseif(strtolower(substr($I1,0,7))=="mailto:"||strtolower(substr($I1,0,11))=="javascript:"){
return $l1;
}else{
$I1 = $l3.$path."/".$I1;
}
return str_replace($I2,"\"$I1\"",$l1);
}
}
?>用法test.php:
CODE:
<?php
require_once(dirname(__FILE__).'/snap.class.php');
$h = new snap();
$h->fetch($_GET['url']);
//echo $h->log;
echo $h->contents;
?>
$ttl为更新快照文件的周期,其中的URL补全函数用到了村里某个兄弟的原创,
对于不好的地方,希望大家将此类完善下去。
作者: samsung 发布时间: 2007-07-09
相关阅读 更多
热门阅读
-
office 2019专业增强版最新2021版激活秘钥/序列号/激活码推荐 附激活工具
阅读:74
-
如何安装mysql8.0
阅读:31
-
Word快速设置标题样式步骤详解
阅读:28
-
20+道必知必会的Vue面试题(附答案解析)
阅读:37
-
HTML如何制作表单
阅读:22
-
百词斩可以改天数吗?当然可以,4个步骤轻松修改天数!
阅读:31
-
ET文件格式和XLS格式文件之间如何转化?
阅读:24
-
react和vue的区别及优缺点是什么
阅读:121
-
支付宝人脸识别如何关闭?
阅读:21
-
腾讯微云怎么修改照片或视频备份路径?
阅读:28