实现coreseek based on sphinx 对drupal和mediawiki的查询,总算基本搞定。
Posted on 2011-01-19 23:26 昕旸灿烂 阅读(447) 评论(0) 收藏 举报#crontab -e
@hourly /usr/local/coreseek/bin/indexer --quiet --rotate --all
#vi /usr/local/coreseek/etc/csoft.conf
csoft.conf
#
# Minimal Sphinx configuration sample (clean, simple, functional)
#
source src_technet_node
{
type = mysql
sql_host = drupal_dbhost
sql_user = drupal_dbusr
sql_pass = drupal_dbpwd
sql_db = drupal_dbname
sql_port = 3306
sql_query_pre = SET NAMES utf8
sql_query = SELECT nid, timestamp, title, body FROM node_revisions
sql_attr_timestamp = timestamp
sql_query_info = SELECT * FROM node_revisions WHERE nid=$id
}
source src_technet_comment
{
type = mysql
sql_host = drupal_dbhost
sql_user = drupal_dbusr
sql_pass = drupal_dbpwd
sql_db = drupal_dbname
sql_port = 3306
sql_query_pre = SET NAMES utf8
sql_query = SELECT cid, nid, timestamp, subject, comment FROM comments
sql_attr_uint = nid
sql_attr_timestamp = timestamp
sql_query_info = SELECT * FROM comments WHERE cid=$id
}
source src_technet_wiki
{
type = mysql
sql_host = mediawiki_dbhost
sql_user = mediawiki_dbusr
sql_pass = mediawiki_dbpwd
sql_db = mediawiki_dbname
sql_port = 3306
sql_query_pre = SET NAMES utf8
sql_query = SELECT rev_id, old_text FROM page, revision, text WHERE rev_id=page_latest AND old_id=rev_text_id
sql_query_info = SELECT page_title FROM page WHERE page_id=$id
}
index technet_drupal_node
{
source = src_technet_node
path = /var/data/technet_drupal_node
docinfo = extern
mlock = 0
morphology = none
min_word_len = 1
html_strip = 0
charset_dictpath = /usr/local/mmseg3/etc/
charset_type = zh_cn.utf-8
ngram_len = 0
}
index technet_drupal_comment
{
source = src_technet_comment
path = /var/data/technet_drupal_comment
docinfo = extern
mlock = 0
morphology = none
min_word_len = 1
html_strip = 0
charset_dictpath = /usr/local/mmseg3/etc/
charset_type = zh_cn.utf-8
ngram_len = 0
}
index technet_wiki
{
source = src_technet_wiki
path = /var/data/technet_wiki
docinfo = extern
mlock = 0
morphology = none
min_word_len = 1
html_strip = 0
charset_dictpath = /usr/local/mmseg3/etc/
charset_type = zh_cn.utf-8
ngram_len = 0
}
indexer
{
mem_limit = 256M
}
searchd
{
port = 3312
log = /var/log/technet-search.log
query_log = /var/log/technet-query.log
read_timeout = 5
max_children = 30
pid_file = /var/log/technet-searchd.pid
max_matches = 1000
seamless_rotate = 1
preopen_indexes = 0
unlink_old = 1
}
# vi search.php
功能实现,还需要再加工咯!
search.php
<html><title>search for r&d</title><head><meta http-equiv = " content-type " content = " text/html; charset=UTF-8 " /></head><body>
<form action ="search.php" method = "post">
<input type="text" name="q" size="40"/>
<input type="submit">
</form>
<?php
if (isset($_POST["q"])) {
$q = $_POST["q"];
unset($_POST["s"]);
search($q);
}
function search($q) {
require ( "sphinxapi.php" );
$cl = new SphinxClient ();
$cl->SetServer ( '168.2.4.246', 3312);
$cl->SetMatchMode(SPH_MATCH_PHRASE);
$res_node = $cl->Query ( $q, 'technet_drupal_node' );
$res_comment = $cl->Query ( $q, 'technet_drupal_comment' );
$res_wiki = $cl->Query ( $q, 'technet_wiki' );
printf("查询:[ %s ],共找到 %d(%d/%d/%d) 条结果。 <hr/>", $q, count($res_node['matches']) + count($res_comment['matches']) + count($res_wiki['matches']), count($res_node['matches']), count($res_comment['matches']), count($res_wiki['matches']));
echo "<ul>";
if (count($res_node['matches']) > 0) {
foreach ($res_node['matches'] as $id => $value)
printf("<li>node: <a href=\"http://technet.baosight.com/drupal/node/%d\">http://technet.baosight.com/drupal/node/%d</a></li>", $id, $id);
}
if (count($res_comment['matches']) > 0) {
foreach ($res_comment['matches'] as $id => $value)
printf("<li>comment: <a href=\"http://technet.baosight.com/drupal/node/%d/#comment-%d\">http://technet.baosight.com/drupal/node/%d/#comment-%d</a></li>", $value[attrs][nid], $id, $value[attrs][nid], $id);
}
if (count($res_wiki['matches']) > 0) {
foreach ($res_wiki['matches'] as $id => $value)
printf("<li>wiki: <a href=\"http://technet.baosight.com/mediawiki/index.php?oldid=%d\">http://technet.baosight.com/mediawiki/index.php?oldid=%d</a></li>", $id, $id);
}
echo "</ul>";
echo '<hr /><pre>';
print_r($cl->GetLastError());
print_r($cl->GetLastWarning());
echo '</pre>';
}
?>
</body></html>

浙公网安备 33010602011771号