php高效内容内链替换

php高效内容内链替换

//$flag 生成字典缓存
function cache_keylink($flag = 1) {
global $db;
$arr = array();
$result = $db->query("SELECT title,url,murl FROM {$db->pre}keylink ORDER BY listorder DESC,itemid DESC limit 20000");
while($r = $db->fetch_array($result)) {
$keylinks[] = $r;
}
cache_write('keylink-all.php', $keylinks);
//生成字典缓存
if($flag){
cache_batchreplace($keylinks);
}
return $keylinks;
}


//字典缓存
function cache_batchreplace($keylinks = [])
{
$keylinks || $keylinks = cache_read('keylink-all.php');
if(!$keylinks){
$keylinks = cache_keylink(0);
}
require_once DT_ROOT.'/include/batchreplace/triefilter.class.php';
$cache_file = 'triefilter.php';
$keylinks = array_column($keylinks,'title');
$tf = new triefilter($keylinks);
$data = serialize($tf);
if(function_exists('gzcompress')) {
//数据压缩
$data = gzcompress(serialize($tf), 3);
}
cache_write($cache_file,$data);
unset($keylinks);
return $tf;
}

类文件1:triefilter.class.php

<?php
require_once DT_ROOT.'/include/batchreplace/trienode.class.php';
class triefilter extends trienode
{
private $ignorecase = false;

public function __construct($keys, $isgore = false)
{
parent::__construct();
$this->ignorecase = $isgore;
$this->AddKey($keys);
}

private function AddKey($keys)
{
foreach ($keys as $j => $key) {

if (empty($key)) {
return;
}
$node = $this;
$key = strsplit($key);
foreach ($key as $i => $v) {
$c = $this->GetChar($v);
if (!$node->TryGetValue($c, $subnode)) {
$subnode = new trienode();
$node->m_values[$c] = $subnode;
}
$node = $subnode;
}
$node->m_end = true;
}
}


private function GetChar($car)
{

if ($this->ignorecase) {
return strtolower($car);
}
return $car;
}


//$single_num 单词替换次数 $num 替换个数
public function Replace($text, $d, $single_num = 1, $excludehtml = true,$num = 3)
{
$ori = $text;

$onlysize = 0;
$length = mb_strlen($text);
$textArr = strsplit($text);
$count_num = 0;
$count_word_num = [];
$rekeyarr = [];
foreach ($textArr as $i => $v) {
$node = null;

if ($this->TryGetValue($this->GetChar($v), $node)) {
for ($j = $i + 1; $j < $length; $j++) {
if ( $node->TryGetValue($this->GetChar($textArr[$j]), $node)) {
if ($node->m_end) {
if (count($node->m_values) > 0 && $length > $j + 1 && array_key_exists($this->GetChar($textArr[$j + 1]), $node->m_values)) {
if ($j + 1 >= $length) {
return $ori;
}
continue;
}

$isin = $excludehtml;
if ($excludehtml) {
$start = mb_substr($text, 0, $i);
if (substr_count($start, "<a") == substr_count($start, "</a>") && substr_count($start, "<") == substr_count($start, ">")) {
$isin = false;
}
}

if (!$isin) {
$key = mb_substr($text, $i, $j + 1 - $i);
foreach($rekeyarr as $rek=>$rev){//防止关键字重复替换 如文本为“美式风格硅藻泥装修效果” 关键词库有 “硅藻泥装修效果”,“装修效果” 会导致 硅藻泥装修效果 另增加 装修效果
if(stripos($rev, $key)!==false) continue 2;
}
$rekeyarr[] = $key;
if (!empty($d[$key])) {
$mvalue = $d[$key];
$ori = mb_substr($ori, 0, $i + $onlysize) . $mvalue . mb_substr($ori, $j + 1 + $onlysize);
$onlysize += mb_strlen($mvalue) - mb_strlen($key);
$count_num++;
if($count_num>=$num){
return $ori;
}
if ($single_num) {
if(isset($count_word_num[$key])){
$count_word_num[$key] = $count_word_num[$key] + 1;
}else{
$count_word_num[$key] = 1;
}
if($count_word_num[$key]>$single_num){
$d[mb_substr($text, $i, $j + 1 - $i)] = "";
}
}
}
}
}

if ($j + 1 >= $length) {
return $ori;
}
} else {
if ($j + 1 >= $length) {
return $ori;
}
break;
}
}
}
}
return $ori;
}
}
?>

类文件2:trienode.class.php
<?php
class trienode
{
public $m_end;
public $m_values;

public function __construct()
{
$this->m_end = false;
$this->m_values = [];
}

public function TryGetValue($key, &$node)
{
if (array_key_exists($key, $this->m_values)) {
$node = $this->m_values[$key];
return true;
}
//$node = new TrieNode();
return false;
}

public function is_biaodian($text){
return preg_match("/(%7E|%60|%21|%40|%23|%24|%25|%5E|%26|%27|%2A|%28|%29|%2B|%7C|%5C|%3D|\-|_|%5B|%5D|%7D|%7B|%3B|%22|%3A|%3F|%3E|%3C|%2C|\.|%2F|%A3%BF|%A1%B7|%A1%B6|%A1%A2|%A1%A3|%A3%AC|%7D|%A1%B0|%A3%BA|%A3%BB|%A1%AE|%A1%AF|%A1%B1|%A3%FC|%A3%BD|%A1%AA|%A3%A9|%A3%A8|%A1%AD|%A3%A4|%A1%A4|%A3%A1|%E3%80%82|%EF%BC%81|%EF%BC%8C|%EF%BC%9B|%EF%BC%9F|%EF%BC%9A|%E3%80%81|%E2%80%A6%E2%80%A6|%E2%80%9D|%E2%80%9C|%E2%80%98|%E2%80%99|%EF%BD%9E|%EF%BC%8E|%EF%BC%88)+/",$text);
}

}
?>

 

/**
* 高效批量字符串替换
* @param string $text 字符串内容
* @param array $words 替换字典,数组结构,李['张三' => '李四', '王五' => '傻六'];意思就是把张三替换成李四,王五替换成傻六
* @param bool $single_num 每个词替换次数
* @param bool $excludehtml 是否排除a标签内的内容,例如<a title="张三哈哈哈">王五哈哈哈</a>,这种,就只会替换王五,这个参数主要是替换带连接内容使用
* @return string
*/
function replace_batch($text, $words, $single_num = 3, $excludehtml = true,$num = 3)
{
require_once DT_ROOT.'/include/batchreplace/triefilter.class.php';
$min = tmsectime();
$cache_file = 'triefilter.php';
$tf = cache_read($cache_file,'',1);
if (!$tf) {
$tf = cache_batchreplace();
}else{
if(function_exists('gzcompress')) {
//启用数据压缩
$tf = gzuncompress($tf);
}
$max = tmsectime();
//var_dump($max - $min);
$tf = unserialize($tf);
}
$max = tmsectime();
//var_dump($max - $min);
$s = $tf->Replace($text, $words, $single_num, $excludehtml,$num);
//var_dump(tmsectime() - $max);
return $s;
}

根据内容替换
function keylink($content, $item = 0, $pamra='pc', $num = 4) {
global $KEYLINK;
$KEYLINK or $KEYLINK = cache_read('keylink-all.php');
if(!$KEYLINK) return $content;
if($pamra =='pc'){
$words = array_column($KEYLINK,'url','title');
}else{
$words = array_column($KEYLINK,'murl','title');
}
array_walk($words,function (&$v,$k) use(&$words){
if(empty($v)){
unset($words[$k]);
}else{
$v = '<a href="'.$v.'" target="_blank" class="keylink">'.$k.'</a>';
}
});
$content = replace_batch($content,$words,2,true,3);
return $content;
}

posted @ 2021-08-04 17:47  飞鹰之歌  阅读(200)  评论(0)    收藏  举报