一个过滤超文本元素,并且允许指定的元素通过的Class
时间:2008-01-18
来源:互联网
即将离开PHP,把写过的一些有意思的代码发给大家,看看对大家有没有用
建立一个过滤器,只允许指定的超文本元素和特定的属性通过,并且能够对属性值也进行控制
这里的代码没有考虑OO的设计和封装性,主要还是考虑到效率的问题,所以想看设计的朋友,就不用看了。
实用主义者,直接拿来用吧:)
建立一个过滤器,只允许指定的超文本元素和特定的属性通过,并且能够对属性值也进行控制
这里的代码没有考虑OO的设计和封装性,主要还是考虑到效率的问题,所以想看设计的朋友,就不用看了。
实用主义者,直接拿来用吧:)
复制PHP内容到剪贴板
<?php
/**
* @copyright 2006 ajss
* @author 徐智 <[[email protected]][email protected][/email]>
*
* $Id: myfilterHtml.class.php 667 2007-07-03 04:48:47Z xuzhi $
*/
/**
* 测试,使用
*/
$value = "<a href='a'>";
$obj = new myfilterHtml(
trim($value),
array(
'center' => array('center' => 0),
'br' => array('br' => 0),
'font' => array('font' => 0, 'size' => 0, 'style' => 0),
'a' => array('a' => 0, 'href' => 0, 'title' => 0),
'img' => array('img' => 0, 'src' => array('javascript'), 'border' => 0, 'width' => 0, 'height' => 0, 'class' => 0, 'alt' => 0, 'title' => 0)
)
);
$value = $obj->filterHtml();
/**
* 过滤超文本元素,并且允许指定的元素通过
*
* myfilterHtml
*/
class myfilterHtml {
const space = ' ';
const quotes = '"';
const quote = "'";
const equ = "=";
private $html = '';
/**
* array(
* 'font' => array('style' => 0, 'size' => 0),
* )
*/
private $allow = array();
private $pos = 0;
private $buf = '';
private $ed = '';
private $htmlStack = array();
private $stackPos = 0;
private $tagNum = 0;
private $htmlTags = array();
/**
* 构造一个允许指定元素通过的Html过滤器
*
* @param $html string 需要过滤的文本
* @param $allow array 允许通过的Html元素,key:允许的元素,Value:数组,允许的属性值,如果属性Key对应的值为一个数组,那么数组中可以指定,元素中不允许出现的关键字
* 'img' => array('img' => 0, 'src' => array('javascript'), 'border' => 0, 'width' => 0, 'height' => 0, 'class' => 0, 'alt' => 0, 'title' => 0)
*/
public function __construct($html, $allow, $ed='utf-8') {
$this->html = $html;
$this->allow = $allow;
$this->ed = $ed;
}
public function filterHtml() {
for ($i = 0; $i < 10; $i++) {
if ($this->nextHtmlTag() === false) break;
}
$htmlTags = $this->htmlTags;
$html = &$this->html;
$ed = &$this->ed;
$p = 0;
$buf = '';
foreach ($htmlTags as $v) {
if ($v[3] === true) {
$buf .= htmlentities(mb_substr($html, $p, $v[1] - $p, $ed), ENT_QUOTES, $ed);
$buf .= mb_substr($html, $v[1], $v[2] - $v[1] + 1, $ed);
$p = $v[2] + 1;
}
}
return $buf . htmlentities(mb_substr($html, $p, mb_strlen($html, $ed) - $p, $ed), ENT_QUOTES, $ed);
}
private function nextHtmlTag() {
$html = &$this->html;
$ed = &$this->ed;
$p = &$this->pos;
//找到html标签
$spos = mb_strpos($html, '<', $p, $ed);
if ($spos === false) return false;
$p = $spos;
$epos = mb_strpos($html, '>', $spos, $ed);
if ($epos === false) return false;
$p = $epos;
if ($this->pushHtml($spos, $epos) === false) return false;
return true;
}
private function pushHtml($s, $e) {
$html = &$this->html;
$ed = &$this->ed;
$spos = $s + 1;
//检测是否是html结束标记
if (mb_substr($html, $spos, 1, $ed) == '/') {
//结束标记
$htmlTag = array(
0 => mb_substr($html, $spos + 1, $e - $spos - 1, $ed),
1 => $s,
2 => $e,
3 => false,
);
if ($this->stackPos > 0) {
$stack = &$this->htmlStack[--$this->stackPos];
if ($stack[0] == $htmlTag[0]) {
$this->htmlTags[] = &$htmlTag;
$htmlTag[3] = true;
$stack[3] = true;
} else {
$this->stackPos++;
}
}
} else {
//开始标记
$pos = mb_strpos($html, ' ', $spos, $ed);
$endTag = mb_substr($html, $e - 1, 1, $ed);
if ($pos > $e || $pos === false) {
$pos = $e;
if ($endTag == '/') $pos--;
}
$pos--;
$htmlTag = array(
0 => mb_substr($html, $spos, $pos - $spos + 1, $ed),
1 => $s,
2 => $e,
3 => false,
);
//过滤非法的html标记
if (true && $this->checkHtml($htmlTag)) {
$this->htmlStack[$this->stackPos++] = &$htmlTag;
$this->htmlTags[] = &$htmlTag;
//关闭标记
if ($endTag == '/') {
$htmlTag[3] = true;
$this->stackPos--;
}
}
}
return true;
}
private function checkHtml(&$htmlTag) {
//TODO: 考虑使用更优的解决方案
$ed = &$this->ed;
$allow = &$this->allow;
if (!isset($allow[$htmlTag[0]])) return false;
$allow = &$allow[$htmlTag[0]];
$buf = mb_substr($this->html, $htmlTag[1] + 1, $htmlTag[2] - $htmlTag[1] - 1, $ed);
$len = strlen($buf);
$pos = -1;
$tag = '';
$att = '';
for ($i = 0; $i < $len; $i++) {
switch ($buf{$i}) {
case self::space:
if ($tag == self::quote || $tag == self::quotes || $tag == self::equ || $tag == self::space) break;
if ($pos >= $i - 1) break;
$pos = $i;
$tag = self::space;
break;
case self::quote:
break;
if ($att == '') return false;
if ($tag == self::quotes) break;
if ($tag == self::quote) {
if ($pos >= $i - 1) break;
//过滤非法属性值
if ($allow[$att] !== 0) {
$attValue = trim(substr($buf, $pos + 1, $i - $pos - 1));
foreach ($allow[$att] as $v) {
if (stripos($attValue, $v) !== false) return false;
}
}
$att = '';
$pos = $i - 1;
$tag = '';
} else {
$tag = self::quote;
}
break;
case self::quotes:
if ($att == '') return false;
if ($tag == self::quote) break;
if ($tag == self::quotes) {
if ($pos >= $i - 1) break;
//过滤非法属性值
if ($allow[$att] !== 0) {
$attValue = trim(substr($buf, $pos + 1, $i - $pos - 1));
foreach ($allow[$att] as $v) {
if (stripos($attValue, $v) !== false) return false;
}
}
$att = '';
$pos = $i - 1;
$tag = '';
} else {
$tag = self::quotes;
}
break;
case self::equ:
if ($pos >= $i - 1) return false;
if ($tag == self::equ) return false;
if ($tag == self::space) {
$att = trim(substr($buf, $pos + 1, $i - $pos - 1));
if (!isset($allow[$att])) return false;
}
$tag = self::equ;
$pos = $i;
break;
default:
;
}
}
return true;
}
}
?>
[ 本帖最后由 xz1980 于 2008-1-20 16:25 编辑 ] PHP代码:
<?php
/**
* @copyright 2006 ajss
* @author 徐智 <[[email protected]][email protected][/email]>
*
* $Id: myfilterHtml.class.php 667 2007-07-03 04:48:47Z xuzhi $
*/
/**
* 测试,使用
*/
$value = "<a href='a'>";
$obj = new myfilterHtml(
trim($value),
array(
'center' => array('center' => 0),
'br' => array('br' => 0),
'font' => array('font' => 0, 'size' => 0, 'style' => 0),
'a' => array('a' => 0, 'href' => 0, 'title' => 0),
'img' => array('img' => 0, 'src' => array('javascript'), 'border' => 0, 'width' => 0, 'height' => 0, 'class' => 0, 'alt' => 0, 'title' => 0)
)
);
$value = $obj->filterHtml();
/**
* 过滤超文本元素,并且允许指定的元素通过
*
* myfilterHtml
*/
class myfilterHtml {
const space = ' ';
const quotes = '"';
const quote = "'";
const equ = "=";
private $html = '';
/**
* array(
* 'font' => array('style' => 0, 'size' => 0),
* )
*/
private $allow = array();
private $pos = 0;
private $buf = '';
private $ed = '';
private $htmlStack = array();
private $stackPos = 0;
private $tagNum = 0;
private $htmlTags = array();
/**
* 构造一个允许指定元素通过的Html过滤器
*
* @param $html string 需要过滤的文本
* @param $allow array 允许通过的Html元素,key:允许的元素,Value:数组,允许的属性值,如果属性Key对应的值为一个数组,那么数组中可以指定,元素中不允许出现的关键字
* 'img' => array('img' => 0, 'src' => array('javascript'), 'border' => 0, 'width' => 0, 'height' => 0, 'class' => 0, 'alt' => 0, 'title' => 0)
*/
public function __construct($html, $allow, $ed='utf-8') {
$this->html = $html;
$this->allow = $allow;
$this->ed = $ed;
}
public function filterHtml() {
for ($i = 0; $i < 10; $i++) {
if ($this->nextHtmlTag() === false) break;
}
$htmlTags = $this->htmlTags;
$html = &$this->html;
$ed = &$this->ed;
$p = 0;
$buf = '';
foreach ($htmlTags as $v) {
if ($v[3] === true) {
$buf .= htmlentities(mb_substr($html, $p, $v[1] - $p, $ed), ENT_QUOTES, $ed);
$buf .= mb_substr($html, $v[1], $v[2] - $v[1] + 1, $ed);
$p = $v[2] + 1;
}
}
return $buf . htmlentities(mb_substr($html, $p, mb_strlen($html, $ed) - $p, $ed), ENT_QUOTES, $ed);
}
private function nextHtmlTag() {
$html = &$this->html;
$ed = &$this->ed;
$p = &$this->pos;
//找到html标签
$spos = mb_strpos($html, '<', $p, $ed);
if ($spos === false) return false;
$p = $spos;
$epos = mb_strpos($html, '>', $spos, $ed);
if ($epos === false) return false;
$p = $epos;
if ($this->pushHtml($spos, $epos) === false) return false;
return true;
}
private function pushHtml($s, $e) {
$html = &$this->html;
$ed = &$this->ed;
$spos = $s + 1;
//检测是否是html结束标记
if (mb_substr($html, $spos, 1, $ed) == '/') {
//结束标记
$htmlTag = array(
0 => mb_substr($html, $spos + 1, $e - $spos - 1, $ed),
1 => $s,
2 => $e,
3 => false,
);
if ($this->stackPos > 0) {
$stack = &$this->htmlStack[--$this->stackPos];
if ($stack[0] == $htmlTag[0]) {
$this->htmlTags[] = &$htmlTag;
$htmlTag[3] = true;
$stack[3] = true;
} else {
$this->stackPos++;
}
}
} else {
//开始标记
$pos = mb_strpos($html, ' ', $spos, $ed);
$endTag = mb_substr($html, $e - 1, 1, $ed);
if ($pos > $e || $pos === false) {
$pos = $e;
if ($endTag == '/') $pos--;
}
$pos--;
$htmlTag = array(
0 => mb_substr($html, $spos, $pos - $spos + 1, $ed),
1 => $s,
2 => $e,
3 => false,
);
//过滤非法的html标记
if (true && $this->checkHtml($htmlTag)) {
$this->htmlStack[$this->stackPos++] = &$htmlTag;
$this->htmlTags[] = &$htmlTag;
//关闭标记
if ($endTag == '/') {
$htmlTag[3] = true;
$this->stackPos--;
}
}
}
return true;
}
private function checkHtml(&$htmlTag) {
//TODO: 考虑使用更优的解决方案
$ed = &$this->ed;
$allow = &$this->allow;
if (!isset($allow[$htmlTag[0]])) return false;
$allow = &$allow[$htmlTag[0]];
$buf = mb_substr($this->html, $htmlTag[1] + 1, $htmlTag[2] - $htmlTag[1] - 1, $ed);
$len = strlen($buf);
$pos = -1;
$tag = '';
$att = '';
for ($i = 0; $i < $len; $i++) {
switch ($buf{$i}) {
case self::space:
if ($tag == self::quote || $tag == self::quotes || $tag == self::equ || $tag == self::space) break;
if ($pos >= $i - 1) break;
$pos = $i;
$tag = self::space;
break;
case self::quote:
break;
if ($att == '') return false;
if ($tag == self::quotes) break;
if ($tag == self::quote) {
if ($pos >= $i - 1) break;
//过滤非法属性值
if ($allow[$att] !== 0) {
$attValue = trim(substr($buf, $pos + 1, $i - $pos - 1));
foreach ($allow[$att] as $v) {
if (stripos($attValue, $v) !== false) return false;
}
}
$att = '';
$pos = $i - 1;
$tag = '';
} else {
$tag = self::quote;
}
break;
case self::quotes:
if ($att == '') return false;
if ($tag == self::quote) break;
if ($tag == self::quotes) {
if ($pos >= $i - 1) break;
//过滤非法属性值
if ($allow[$att] !== 0) {
$attValue = trim(substr($buf, $pos + 1, $i - $pos - 1));
foreach ($allow[$att] as $v) {
if (stripos($attValue, $v) !== false) return false;
}
}
$att = '';
$pos = $i - 1;
$tag = '';
} else {
$tag = self::quotes;
}
break;
case self::equ:
if ($pos >= $i - 1) return false;
if ($tag == self::equ) return false;
if ($tag == self::space) {
$att = trim(substr($buf, $pos + 1, $i - $pos - 1));
if (!isset($allow[$att])) return false;
}
$tag = self::equ;
$pos = $i;
break;
default:
;
}
}
return true;
}
}
?>
作者: xz1980 发布时间: 2008-01-18
应鼓励原创精品。
另,不要离开PHP!
另,不要离开PHP!

作者: luzhou 发布时间: 2008-01-19
过滤超文本元素很复杂吗,我还在用几个简单的函数过滤
作者: gamaray 发布时间: 2008-02-02


作者: luzhou 发布时间: 2008-02-05
引用:
原帖由 luzhou 于 2008-1-19 08:58 发表应鼓励原创精品。
另,不要离开PHP!

作者: psdshow 发布时间: 2008-02-05

作者: luzhou 发布时间: 2008-02-06
相关阅读 更多
热门阅读
-
office 2019专业增强版最新2021版激活秘钥/序列号/激活码推荐 附激活工具
阅读:74
-
如何安装mysql8.0
阅读:31
-
Word快速设置标题样式步骤详解
阅读:28
-
20+道必知必会的Vue面试题(附答案解析)
阅读:37
-
HTML如何制作表单
阅读:22
-
百词斩可以改天数吗?当然可以,4个步骤轻松修改天数!
阅读:31
-
ET文件格式和XLS格式文件之间如何转化?
阅读:24
-
react和vue的区别及优缺点是什么
阅读:121
-
支付宝人脸识别如何关闭?
阅读:21
-
腾讯微云怎么修改照片或视频备份路径?
阅读:28