discuz在线分词提取关键字、tag标签
discuz有个很不错的在线分记系统: http://keyword.discuz.com/related_kw.html?title=$qc_rows&ics=utf-8&ocs=utf-8,
利用它可以为文章提取关键字和标签.
直接看PHP参考代码吧1:
function zhgetkey($contents){
$rows = strip_tags($contents);
$arr = array(' ',' ',"\s", "\r\n", "\n", "\r", "\t", ">", "“", "”","<br />");
$qc_rows = str_replace($arr, '', $rows);
if(strlen($qc_rows)>2400){
$qc_rows = substr($qc_rows, '0', '2400');
}
$data = @implode('', file
("http://keyword.discuz.com/related_kw.html?title=$qc_rows&ics=utf-8&ocs=utf-8"));
preg_match_all("/<kw>(.*)A\[(.*)\]\](.*)><\/kw>/"
,$data, $out, PREG_SET_ORDER);
$key="";
for($i=0;$i<5;$i++){
$key=$key.$out[$i][2];
if($out[$i][2])$key=$key.",";
}
return $key;
}
直接看PHP参考代码吧2:
===========================================
//自动提取关键字tag开始
$charset = 'GBK'; //设定编码
$subjectenc=$_POST['title'];
$subjectenc.=SubSmalltextVal($_POST['newstext'],160); //关键字提取字段
//远程提取关键子
$data
= @implode('',
file("http://keyword.discuz.com/related_kw.htmlics=$charset&ocs=$charset&title=$subjectenc&content=$subjectenc"));
if($data){
//xml处理开始 谨慎修改
$parser = xml_parser_create();
xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, 0);
xml_parser_set_option($parser, XML_OPTION_SKIP_WHITE, 1);
xml_parse_into_struct($parser, $data, $values, $index);
xml_parser_free($parser);
//xml处理结束 谨慎修改
$kws = array();
foreach($values as $valuearray) {
if($valuearray['tag']=='kw'||$valuearray['tag']=='ekw') {
$kws[] =mb_convert_encoding(trim($valuearray['value']),"GBK","UTF-8"); //拆分关键字数组并转换编码
}
}
$keywords = '';
if($kws){
foreach($kws as $kw) {
$kw = htmlspecialchars($kw);
$keywords .= $kw.','; //关键字加,号
}
$keywords = substr(htmlspecialchars($keywords), 0, -1) ;//去除尾部,号
}
}
$_POST['keyboard']=$keywords;//关键字赋值
$_POST['infotags']=$keywords;//tag赋值
//自动提取关键字tag结束
=========================================
asp自己仿造修改下就可以, 很简单的
目前四大门户网站也有类似的功能,你可以把他们的api挖出来,自己加上,呵呵
转自:http://hi.baidu.com/zhaoxinet/item/83ba2dc2dd573442a8ba9447

浙公网安备 33010602011771号