|
这篇文章主要介绍了通过删除html标签得到的纯文本可处理嵌套的标签,需要的朋友可以参考下
方法基本上来自THinkphp中的源码,但是被我修改了一下 复制代码 代码如下: <?php /* *@Description:删除HTML标签,得到纯文本。可以处理嵌套的标签 * */ class deleteHtmlTags{
private $filename;
function __construct($filename='C:/AppServ/www/text.txt'){ $this->filename = $filename; }
/** * 删除html标签,得到纯文本。可以处理嵌套的标签,局限性在于连标签内的属性值都会删除掉 * @access public * @param string $string 要处理的html * @return string */ public function deletehtmltags(){ $content = $this->contentGet(); while(strstr($content, '>')){ $currentBegin = strpos($content, '<'); $currentEnd = strpos($content, '>'); $cha = $currentEnd - $currentBegin - 1; $tmpStringBegin = @substr($content, 0, $currentBegin); // $tmpStringMiddle = @ substr($content, $currentBegin + 1, $cha); $tmpStringEnd = @substr($content, $currentEnd + 1, strlen($content)); // $content = $tmpStringBegin.$tmpStringMiddle.$tmpStringEnd; $content = $tmpStringBegin.$tmpStringEnd; } return $content; }
private function contentGet(){ $fd = fopen($this->filename, 'r'); $content = fread($fd, filesize($this->filename)); fclose($fd); return $content; } }
$deleteHtml = new deleteHtmlTags(); $content = $deleteHtml->deletehtmltags(); echo $content; ?> 修改部分也在上面,只是注释掉了。个人觉得这个方法比用正则这类的方法更好。
|
|