【转】全文检索引擎Sphinx配置文件详细介绍

原文地址：http://blog.sina.com.cn/s/blog_6c971aa301012yfb.html

## Sphinx configuration file sample
# WARNING! While this sample file mentions all available options,
# it contains (very) short helper descriptions only. Please refer to
# doc/sphinx.html for details.
#
#警告！虽然这个配置文件列出了所有的可用选项，但是它包只包含非常短的帮助描述和简介，请访问并参考doc文件夹下的sphinx.html获得更加详细的帮助

#############################################################################
## data source definition 数据源定义
#数据源就是数据从哪儿来，也有知道数据源了，我们才能够清楚，我们需要检索的是谁吧？
#
# LAMP兄弟连提示您他的语法格式为：
#
# source 名字{
# 选项
#
# }
#############################################################################

source src1
{
# data source type. mandatory, no default value
# 数据库类，强制性的，没有默认值
# known types are mysql, pgsql, mssql, xmlpipe, xmlpipe2, odbc
# 已知的类型包括mysql,pgsql,mssql,xmlpipe,xmlpipe2,odbc
type = mysql

#####################################################################
## SQL settings (for 'mysql' and 'pgsql' types)
# 以下SQL设置专为mysql,pgsql类型提供支持
#####################################################################

# some straightforward parameters for SQL source types
# 一些简单的数据源类型参数
sql_host    = localhost
#sql主机
sql_user    = test
#sql用户
sql_pass    = password
#sql密码
sql_db      = test
#sql数据库
sql_port    = 3306 # optional, default is 3306
#sql数据库端口，mysql的默认值为3306,可以进行修改

# UNIX socket name
# unix soket名称
# optional, default is empty (reuse client library defaults)
# 可选，默认为空(重用默认客户端库默认)
# usually '/var/lib/mysql/mysql.sock' on Linux
# linux服务器上通常在以下路径'/var/lib/mysql/mysql.sock'，根据具体情况不同而进行设置不同
# usually '/tmp/mysql.sock' on FreeBSD
# FreeBSD操作系统通常在/tmp/mysql.sock路径下
#
# sql_sock = /tmp/mysql.sock
# 在此处选择设置sock路径

# MySQL specific client connection flags
# mysql 具体的客户端连接标识
# optional, default is 0
# 可选，默认置为0
#
# mysql_connect_flags = 32 # enable compression
# 启用压缩

# MySQL specific SSL certificate settings
# mysql 具体的 SSL（安全套接层）证书设置
# optional, defaults are empty
# 可选项，默认值为空
#
# mysql_ssl_cert  = /etc/ssl/client-cert.pem
# mysql ssl证书路径
#
# mysql_ssl_key  = /etc/ssl/client-key.pem
# 证书key
# mysql_ssl_ca  = /etc/ssl/cacert.pem
# 证书ca(何为CA：CA是证书的签发机构,它是PKI的核心。CA是负责签发证书、认证证书、管理已颁发证书的机关。它要制定政策和具体步骤来验证、识别用户身份，并对用户证书进  # 行签名，以确保证书持有者的身份和公钥的拥有权。)

# MS SQL specific Windows authentication mode flag
# MSSQL 具体的身份验证模式标识
# MUST be in sync with charset_type index-level setting
# 必须在sync（同步）设置charset_type的索引级别
# optional, default is 0
# 可选的，默认值为0
#
# mssql_winauth = 1 # use currently logged on user credentials
# 使用当前用户登陆凭据

# MS SQL specific Unicode indexing flag
# MS SQL 具体的unicode 索引标识
# optional, default is 0 (request SBCS data)
# 可选的，默认值为0 (响应SBCS数据)
# mssql_unicode = 1 # request Unicode data from server
# 从服务器响应unicode数据

# ODBC specific DSN (data source name)
# ODBC 具体的DSN(注：何为DSN？DSN为数据源名)
# mandatory for odbc source type, no default value
# 以下部份设置DSN，根据不同情况进行不同设置
# odbc_dsn = DBQ=C:\data;DefaultDir=C:\data;Driver={Microsoft Text Driver (*.txt; *.csv)};
# sql_query = SELECT id, data FROM documents.csv

# pre-query, executed before the main fetch query
# 预先查询，执行前的主要取的多值查询操作
# multi-value, optional, default is empty list of queries
# 可选，默认值为空的查询清单
# sql_query_pre = SET NAMES utf8
# 预查询设置字符集utf8，切记不要加中横线
# sql_query_pre = SET SESSION query_cache_type=OFF
# 设置查询的SESSION query_cache_type为关闭状态

# main document fetch query
# mandatory, integer document ID field MUST be the first selected column
# 主要文件查询强制提取,整型文档ID字段必须为首选列
sql_query    = \
  SELECT id, group_id, UNIX_TIMESTAMP(date_added) AS date_added, title, content \
  FROM documents

# range query setup, query that must return min and max ID values
# 查询范围设置，查询必须设置一个返回的最大值和最小值
# optional, default is empty
# 可选项，默认值为空
#
# sql_query will need to reference $start and $end boundaries
# SQL_QUERY 需要引用一个$start 和$end 边界
# if using ranged query:
#
# sql_query = \
# SELECT doc.id, doc.id AS group, doc.title, doc.data \
# FROM documents doc \
# WHERE id>=$start AND id<=$end
#
# sql_query_range = SELECT MIN(id),MAX(id) FROM documents
# 查询范围：查询最小值和最大值从documents文档。此处的documents文档是指样例中的example.sql中的.sql文件

# range query step
# 范围查询步骤
# optional, default is 1024
# 可选项，默认值为1024
#
# sql_range_step = 1000

# unsigned integer attribute declaration
# 无符号整型声明
# multi-value (an arbitrary number of attributes is allowed), optional
# 多值，准许任意数量的属性
# optional bit size can be specified, default is 32
# 可选的位大小是可以被指定的，默认为32
#
# sql_attr_uint   = author_id
# sql_attr_uint   = forum_id:9 # 9 bits for forum_id 存储9位给forum_id这个字段
sql_attr_uint   = group_id

# boolean attribute declaration
# 布尔属性声明
# multi-value (an arbitrary number of attributes is allowed), optional
# 多值（准许任意数量的属性），可选
# equivalent to sql_attr_uint with 1-bit size
# 相当于 sql_sttr_uint 给1位的长度
# sql_attr_bool = is_deleted

# bigint attribute declaration
# 大整型属性声明
# multi-value (an arbitrary number of attributes is allowed), optional
# 多值（准许任意数量的属性），可选
# declares a signed (unlike uint!) 64-bit attribute
# 声明一个签名的64位属性
# sql_attr_bigint = my_bigint_id

# UNIX timestamp attribute declaration
# unix时间戳声明
# multi-value (an arbitrary number of attributes is allowed), optional
# 多值（准许任意数量的属性），可选
# similar to integer, but can also be used in date functions
# 类似的整数，但也可以使用日期函数
# sql_attr_timestamp = posted_ts
# sql_attr_timestamp = last_edited_ts
sql_attr_timestamp = date_added

# string ordinal attribute declaration
# 字符串属性声明
# multi-value (an arbitrary number of attributes is allowed), optional
# 多值（准许任意数量的属性），可选
# sorts strings (bytewise), and stores their indexes in the sorted list
# 各种字符串，并且存储他们的索引在排序列表中
# sorting by this attr is equivalent to sorting by the original strings
# 排通通过这个属性是等效于由原始字符串排序
# sql_attr_str2ordinal = author_name

# floating point attribute declaration
# 浮点字符串属性声明
# multi-value (an arbitrary number of attributes is allowed), optional
# 多值（准许任意数量的属性），可选
# values are stored in single precision, 32-bit IEEE 754 format
# 值存储于单精度中，32位长度，属于IEEE 754格式
# sql_attr_float = lat_radians
# sql_attr_float = long_radians

# multi-valued attribute (MVA) attribute declaration
# 多值属性声明
# multi-value (an arbitrary number of attributes is allowed), optional
# 多值（准许任意数量的属性），可选
# MVA values are variable length lists of unsigned 32-bit integers
# 多值属性值是变长的无符号32位整型
# syntax is ATTR-TYPE ATTR-NAME 'from' SOURCE-TYPE [;QUERY] [;RANGE-QUERY]
# 语法格式是 ATTR-TYPE ATTR-NAME from 源类型 [;QUERY][;RANGE-QUERY]
# ATTR-TYPE is 'uint' or 'timestamp'
# 属性类型是uint或者时间戳
# SOURCE-TYPE is 'field', 'query', or 'ranged-query'
# 源类型是字段，query或者是ranged-query
# QUERY is SQL query used to fetch all ( docid, attrvalue ) pairs
# 查询是SQL查询使用获取所有(docid,属性值)的对
# RANGE-QUERY is SQL query used to fetch min and max ID values, similar to 'sql_query_range'
# RANGE-QUERY（区间查询）是用作SQL查询时获取最小ID和最大ID值地，类似于'sql_query_range'
# sql_attr_multi = uint tag from query; SELECT id, tag FROM tags
# sql_attr_multi = uint tag from ranged-query; \
# SELECT id, tag FROM tags WHERE id>=$start AND id<=$end; \
# SELECT MIN(id), MAX(id) FROM tags

# post-query, executed on sql_query completion
# 查询后，在SQL查询完成执行
# optional, default is empty
# 可选项，默认值为空
# sql_query_post =

# post-index-query, executed on successful indexing completion
#
# post-index-query,执行成功后索引
# optional, default is empty
# 可选项，默认值为空
# $maxid expands to max document ID actually fetched from DB
# $maxid 扩展至最大的文档ID 实际上取出来自数据库
# sql_query_post_index = REPLACE INTO counters ( id, val ) \
# VALUES ( 'max_indexed_id', $maxid )

# ranged query throttling, in milliseconds
# 查询范围限制，以毫秒为单位
# optional, default is 0 which means no delay
# 可选，默认为0，这意味着没有延误
# enforces given delay before each query step
# 每个查询执行前给予延迟
sql_ranged_throttle = 0

# document info query, ONLY for CLI search (ie. testing and debugging)
# 文档信息查询，只为CLI的搜索
# optional, default is empty
# 可选，默认值为空
# must contain $id macro and must fetch the document by that id
# 必须包含ID 宏并且必须通过这个ID读取文档
sql_query_info = SELECT * FROM documents WHERE id=$id

# kill-list query, fetches the document IDs for kill-list
# kill-list 查询，查询这些文档的ID用作kill列表
# kill-list will suppress matches from preceding indexes in the same query
# kill-list 从上述指标将取消同一查询匹配
# optional, default is empty
# 可选项，默认值为空
# sql_query_killlist = SELECT id FROM documents WHERE edited>=@last_reindex

# columns to unpack on indexer side when indexing
# 当索引时字段解压在索引解析器
# multi-value, optional, default is empty list
# 多值，可选项，默认值为空列表
# unpack_zlib = zlib_column
# unpack_mysqlcompress = compressed_column
# unpack_mysqlcompress = compressed_column_2

# maximum unpacked length allowed in MySQL COMPRESS() unpacker
# 最大解压长度准许mysql COMPRESS() 解压
# optional, default is 16M
# 可选项，默认值为16M
# unpack_mysqlcompress_maxsize = 16M

#####################################################################
## xmlpipe settings
## xmlpipe 设置
#####################################################################

# type = xmlpipe
#
# 类型=xmlpipe

# shell command to invoke xmlpipe stream producer
# shell 全命令行去解析xmlpipe流
# mandatory
# 强制
# xmlpipe_command = cat @CONFDIR@/test.xml

#####################################################################
## xmlpipe2 settings
## xmlpipe2 设置
#####################################################################

# type = xmlpipe2
# xmlpipe_command = cat @CONFDIR@/test2.xml

# xmlpipe2 field declaration
# xmlpipe2 字段定义
# multi-value, optional, default is empty
# 多值，可选，默认值为空
#
# xmlpipe_field = subject
# xmlpipe_field = content

# xmlpipe2 attribute declaration
# xmlpipe2 属性定义
# multi-value, optional, default is empty
# 多值，可选项，默认值为空
# all xmlpipe_attr_XXX options are fully similar to sql_attr_XXX
# 所有xmlpipe_attr_XXX选项是完全类似的sql_attr_XXX
# xmlpipe_attr_timestamp = published
# xmlpipe_attr_uint = author_id

# perform UTF-8 validation, and filter out incorrect codes
# 执行的UTF - 8验证，并过滤掉不正确的代码
# avoids XML parser choking on non-UTF-8 documents
# 避免的XML解析器抑制非UTF- 8文件
# optional, default is 0
# 可选项，默认值为0
# xmlpipe_fixup_utf8 = 1
}

# inherited source example
# 继承源样例
# all the parameters are copied from the parent source,
# 所有的参数都是复制来源于父级源，
# and may then be overridden in this source definition
# 并且可能覆盖这个源定义
source src1throttled : src1
{
sql_ranged_throttle = 100
}

#############################################################################
## index definition
## 索引定义
#############################################################################

# local index example
# 本机索引样例
#
# this is an index which is stored locally in the filesystem
# 这是一个索引，它存储于本地文件系统
# all indexing-time options (such as morphology and charsets)
# 所有索引时间参数（如形态和字符集）
# are configured per local index
# 配置如当地索引
index test1
{
# document source(s) to index
# 文档源去索引
# multi-value, mandatory
# 多值，强制
# document IDs must be globally unique across all sources
# 文档的所有ID必须是全局的唯一的跨越所有来源
source = src1

# index files path and file name, without extension
# 索引文件路径和文件名，无扩展
# mandatory, path must be writable, extensions will be auto-appended
# 强制，路径必须是可写的，扩展会自动产生
path = @CONFDIR@/data/test1

# document attribute values (docinfo) storage mode
# 文档属性值（文档信息）存储模式
# optional, default is 'extern'
# 可选，默认值为'extern'
# known values are 'none', 'extern' and 'inline'
# 已知的值为'none','extern'和'inline'
docinfo = extern

# memory locking for cached data (.spa and .spi), to prevent swapping
# 内存锁定为缓存（.spa和.spi）,以防止交换
# optional, default is 0 (do not mlock)
# 可选项，默认值为0 （不进行内存锁）
# requires searchd to be run from root
mlock = 0

# a list of morphology preprocessors to apply
# 一个列表预处理去应用
# optional, default is empty
#可选，默认值为空
# builtin preprocessors are 'none', 'stem_en', 'stem_ru', 'stem_enru',
# 内置预处理器是'none','stem_en','stem_ru','stem_enru','soundex'和'metaphone';额外的处理器可用来自libstemmer是'ibstemmer_xxx',其中XXX是算法代码
# 'soundex', and 'metaphone'; additional preprocessors available from
# libstemmer are 'libstemmer_XXX', where XXX is algorithm code
# (see libstemmer_c/libstemmer/modules.txt) 查看libstemmer_c/libstemmer/modules.txt
#
# morphology = stem_en, stem_ru, soundex
# morphology = libstemmer_german
# morphology = libstemmer_sv
morphology = none

# minimum word length at which to enable stemming
# 最小词长度中去产生
# optional, default is 1 (stem everything)
# 可选项，默认值为1（所有）
# min_stemming_len = 1

# stopword files list (space separated)
# 非索引字的文件列表（用空格隔开）
# optional, default is empty
# 可选项，默认值为空
# contents are plain text, charset_table and stemming are both applied
# 内容为一个纯文本，charset_table和阻止都适用
# stopwords = @CONFDIR@/data/stopwords.txt

# wordforms file, in "mapfrom > mapto" plain text format
# wordforms 文件“mapfrom> mapto”，纯文本格式
# optional, default is empty
# 可选，默认值为空
#
# wordforms = @CONFDIR@/data/wordforms.txt

# tokenizing exceptions file
# tokenizing例外文件
# optional, default is empty
# 可选项，默认值为空
# plain text, case sensitive, space insensitive in map-from part
# 纯文本，大小写敏感，空间不敏感
# one "Map Several Words => ToASingleOne" entry per line
#
# exceptions = @CONFDIR@/data/exceptions.txt

# minimum indexed word length
# 最低索引字长
# default is 1 (index everything)
# 默认为1（所有的索引）
min_word_len = 1

# charset encoding type
# 字符集编码类型
# optional, default is 'sbcs'
#可选项，默认值为sbcs
# known types are 'sbcs' (Single Byte CharSet) and 'utf-8'
# 可选的类型为sbcs和utf-8
charset_type = utf-8

# charset definition and case folding rules "table"
# 字符集的定义和案例折叠规则“表”
# optional, default value depends on charset_type
# 可选项，默认值取决于charset_type
# defaults are configured to include English and Russian characters only
# 默认配置只包括英文和俄文字符
# you need to change the table to include additional ones
# 您需要更改的表包含附加字符集
# this behavior MAY change in future versions
# 这种行为可能会更改在将来的版本中
#
# 'sbcs' default value is
# sbcs默认值是
# charset_table = 0..9, A..Z->a..z, _, a..z, U+A8->U+B8, U+B8, U+C0..U+DF->U+E0..U+FF, U+E0..U+FF
# utf8的默认值为
# 'utf-8' default value is
charset_table = 0..9, A..Z->a..z, _, a..z, U+410..U+42F->U+430..U+44F, U+430..U+44F

# ignored characters list
# 忽略字符列表
# optional, default value is empty
# 可选项，默认值为空
# ignore_chars = U+00AD

# minimum word prefix length to index
# 最小单词前缀长度索引
# optional, default is 0 (do not index prefixes)
# 可选项，默认值为0（不包含索引前缀）
# min_prefix_len = 0

# minimum word infix length to index
# 最小字缀长度索引
# optional, default is 0 (do not index infixes)
# 可选项，默认值为0（不包含索引前缀）
# min_infix_len = 0

# list of fields to limit prefix/infix indexing to
#列出字段去限制前缀、中
# optional, default value is empty (index all fields in prefix/infix mode)
# 可选项，默认值为空（索引的所有字段，前缀/中缀模式）
# prefix_fields = filename
# infix_fields = url, domain

# enable star-syntax (wildcards) when searching prefix/infix indexes
# 启动star-syntax(通配符)当搜索时前缀、中缀索引
# known values are 0 and 1
# 可选值为0和1
# optional, default is 0 (do not use wildcard syntax)
# 可选项，默认值为0(不要使用通配符语法)
# enable_star = 1

# n-gram length to index, for CJK indexing
# n-gram 长度去索引，为了CJK编码索引
# only supports 0 and 1 for now, other lengths to be implemented
# 现在只支持0和1，其他的长度无法去实现
# optional, default is 0 (disable n-grams)
# 可选项，默认值为0 关闭n-grams
# ngram_len = 1

# n-gram characters list, for CJK indexing
# n-gram字符集列表，为CJK字符集索引
# optional, default is empty
# 可选项，默认值为空
# ngram_chars = U+3000..U+2FA1F

# phrase boundary characters list
# 短语边界字符列表
# optional, default is empty
# 可选项，默认值为空
# phrase_boundary = ., ?, !, U+2026 # horizontal ellipsis

# phrase boundary word position increment
# 短语边界位置增量字
# optional, default is 0
# 可选项，默认值为0
# phrase_boundary_step = 100

# whether to strip HTML tags from incoming documents
# 是否过滤HTML标签
# known values are 0 (do not strip) and 1 (do strip)
# 已知的值是0（不过虑）和1(过滤)
# optional, default is 0
# 可选项，默认值为0
html_strip = 0

# what HTML attributes to index if stripping HTML
# 哪些HTML属性在索引的时候需要被过滤
# optional, default is empty (do not index anything)
# 可选项，默认值为空（不去索引任何项）
# html_index_attrs = img=alt,title; a=title;

# what HTML elements contents to strip
# 哪些HTML元素索引的时候需要被过滤
# optional, default is empty (do not strip element contents)
#可选项，默认为空（不过虑这些元素内容）
# html_remove_elements = style, script

# whether to preopen index data files on startup
# 在启动时是否预开索引数据文件
# optional, default is 0 (do not preopen), searchd-only
# 可选项，默认值为0(不预开)
# preopen = 1

# whether to keep dictionary (.spi) on disk, or cache it in RAM
# 是否保留字典（.SPI）在磁盘上，或者是缓存在内存上
# optional, default is 0 (cache in RAM), searchd-only
# 可选项，默认值为0（缓存于内存）
# ondisk_dict = 1

# whether to enable in-place inversion (2x less disk, 90-95% speed)
# 是否启用就地转化(2倍更少的磁盘，90-95％的速度)
# optional, default is 0 (use separate temporary files), indexer-only
# 可选项，默认值为0（使用单独的临时文件）
# inplace_enable = 1

# in-place fine-tuning options
# 就地微调选项
# optional, defaults are listed below
# 可选项，下面列出了默认项
# inplace_hit_gap = 0 # preallocated hitlist gap size 预分配差距的大小hitlist
# inplace_docinfo_gap = 0 # preallocated docinfo gap size 预分配差距的大小docinfo
# inplace_reloc_factor = 0.1 # relocation buffer size within arena 搬迁缓冲区大小在舞台上
# inplace_write_factor = 0.1 # write buffer size within arena 写入缓冲区大小在舞台上

# whether to index original keywords along with stemmed versions
# 随着索引是否源于原始版本的关键字
# enables "=exactform" operator to work 操作人员工作
#
# optional, default is 0
# 可选项，默认值为0
# index_exact_words = 1

# position increment on overshort (less that min_word_len) words
# 位于增量的overshort（不到min_word_len）词上
# optional, allowed values are 0 and 1, default is 1
# 可选项，准许值有0和1，默认值为1
# overshort_step = 1

# position increment on stopword
# 位于增量的非索引字
# optional, allowed values are 0 and 1, default is 1
# 可选项，准许值有0和1，默认值为1
# stopword_step = 1
}

# inherited index example
# 继承索引样例
# all the parameters are copied from the parent index,
# 所有的参数都是从父复制索引，
# and may then be overridden in this index definition
# 然后，可以在该索引覆盖的定义
index test1stemmed : test1
{
path = @CONFDIR@/data/test1stemmed
morphology = stem_en
}

# distributed index example
# 分布式索引的例子
# this is a virtual index which can NOT be directly indexed,
# 这是一个虚拟的指标，不能直接索引
# and only contains references to other local and/or remote indexes
# 并且只包含其他本地及/或远程索引引用
index dist1
{
# 'distributed' index type MUST be specified
# '分布'必须指定索引类型
type = distributed

# local index to be searched
# 本地索引进行搜索
# there can be many local indexes configured
# 可以有很多配置本地索引
local = test1
local = test1stemmed

# remote agent
# 远程代理
# multiple remote agents may be specified
# 多个远程代理可以指定
# syntax for TCP connections is 'hostname:port:index1,[index2[,...]]'
# TCP连接的语法'hostname:port:index1,[index2[,...]]'
# syntax for local UNIX connections is '/path/to/socket:index1,[index2[,...]]'
# 语法本地UNIX连接 '/path/to/socket:index1,[index2[,...]]'
agent    = localhost:9313:remote1
agent    = localhost:9314:remote2,remote3
# agent    = /var/run/searchd.sock:remote4
        #代理
# blackhole remote agent, for debugging/testing
# 黑洞远程代理，调试/测试
# network errors and search results will be ignored
# 网络错误和搜索结果将被忽略
# agent_blackhole  = testbox:9312:testindex1,testindex2

# remote agent connection timeout, milliseconds
# 远程代理连接超时，毫秒
# optional, default is 1000 ms, ie. 1 sec
# 可选项，默认值为1000ms,1秒
agent_connect_timeout = 1000

# remote agent query timeout, milliseconds
# 远程代理查询超时，毫秒
# optional, default is 3000 ms, ie. 3 sec
# 可选项，默认值为3000毫秒相当于3秒
agent_query_timeout = 3000
}

#############################################################################
## indexer settings
## 索引器设置
#############################################################################

indexer
{
# memory limit, in bytes, kiloytes (16384K) or megabytes (256M)
# 内存大小限制，16384K或兆字节（256M的）
# optional, default is 32M, max is 2047M, recommended is 256M to 1024M
# 可选项，默认值为32M,最大值为2047M,推荐的是256M到1024M
mem_limit = 32M

# maximum IO calls per second (for I/O throttling)
# 最高每秒的IO调用
# optional, default is 0 (unlimited)
# 可选项，默认值为0 不限制
# max_iops = 40

# maximum IO call size, bytes (for I/O throttling)
# 最大IO调用大小，字节（用于I / O限制）
# optional, default is 0 (unlimited)
# 可选项，默认值为0不限制
# max_iosize = 1048576

# maximum xmlpipe2 field length, bytes
# 最大xmlpipe2字段长度，字节
# optional, default is 2M
# 可选项，默认值为2M
# max_xmlpipe2_field = 4M

# write buffer size, bytes
# 写入缓冲区大小，字节
# several (currently up to 4) buffers will be allocated
# 几个（目前最多4个）的缓冲区将被分配
# write buffers are allocated in addition to mem_limit
# 写缓冲区分配除了mem_limit
# optional, default is 1M
# 可选项，默认值为1M
# write_buffer = 1M
}

#############################################################################
## searchd settings
## searchd 设置
#
#############################################################################

searchd
{
# hostname, port, or hostname:port, or /unix/socket/path to listen on
# 主机名，端口，或者为主机名:端口，或者是/unix/socket/path去监听一个多值，准许多点监听
# multi-value, multiple listen points are allowed
# optional, default is 0.0.0.0:9312 (listen on all interfaces, port 9312)
# 可选项，默认值为0.0.0.1:9312(监听所有的接口，端口为9312)
# listen    = 127.0.0.1
# listen    = 192.168.0.1:9312
# listen    = 9312
# listen    = /var/run/searchd.sock

# log file, searchd run info is logged here
# 日志文件，searchd的信息被记录在这里运行
# optional, default is 'searchd.log'
log = @CONFDIR@/log/searchd.log

# query log file, all search queries are logged here
# 查询日志文件，所有的搜索记录全部在记录在这儿
# optional, default is empty (do not log queries)
# 可选项，默认值为空（不记录查询日志）
query_log = @CONFDIR@/log/query.log

# client read timeout, seconds
# 客户端读取超时时间，秒为单位
# optional, default is 5
# 可选项，默认值为5
read_timeout = 5

# request timeout, seconds
# 响应请求超时时间,单为为秒
# optional, default is 5 minutes
# 可选项，默认值为5分钟
client_timeout = 300

# maximum amount of children to fork (concurrent searches to run)
#
# optional, default is 0 (unlimited)
# 可选项，默认值为0(无限)
max_children = 30

# PID file, searchd process ID file name
# 进程文件，搜索的处理ID文件名
# mandatory
# 必选
pid_file = @CONFDIR@/log/searchd.pid

# max amount of matches the daemon ever keeps in RAM, per-index
# WARNING, THERE'S ALSO PER-QUERY LIMIT, SEE SetLimits() API CALL
# default is 1000 (just like Google)
max_matches = 1000

# seamless rotate, prevents rotate stalls if precaching huge datasets
# optional, default is 1
# 可选项，默认值为1
seamless_rotate = 1

# whether to forcibly preopen all indexes on startup
# 是否在启动时强行预开所有的索引
# optional, default is 0 (do not preopen)
# 可选项，默认值为0（不进行预开）
preopen_indexes = 0

# whether to unlink .old index copies on succesful rotation.
# 是否断开，旧的索引复本成功的扭转。
# optional, default is 1 (do unlink)
# 可选项，默认值为1（断开连接）
unlink_old = 1

# attribute updates periodic flush timeout, seconds
# updates will be automatically dumped to disk this frequently
# optional, default is 0 (disable periodic flush)
#
# attr_flush_period = 900

# instance-wide ondisk_dict defaults (per-index value take precedence)
# optional, default is 0 (precache all dictionaries in RAM)
#
# ondisk_dict_default = 1

# MVA updates pool size
# MVA 更新池的大小
# shared between all instances of searchd, disables attr flushes!
# 共享的所有实例之间的searchd，禁用属性冲突
# optional, default size is 1M
# 可选项，默认值为1M
mva_updates_pool = 1M

# max allowed network packet size
# 最大准许的网络包大小
# limits both query packets from clients, and responses from agents
# 限制查询包大小来自客户端和响应代理
# optional, default size is 8M
# 可选项，默认值为8M
max_packet_size = 8M

# crash log path
# 崩溃日志路径
# searchd will (try to) log crashed query to 'crash_log_path.PID' file
# searchd 将去偿试记录崩溃查询到'crash_log_path.pid'文件
# optional, default is empty (do not create crash logs)
# 可选项，默认值为空。（不记录崩溃日志）
# crash_log_path = @CONFDIR@/log/crash

# max allowed per-query filter count
# 最大允许每个查询过滤器计数
# optional, default is 256
# 可选项，默认值为256
max_filters = 256

# max allowed per-filter values count
# 最大允许每个过滤值的计数
# optional, default is 4096
# 可选项，默认值为4096
max_filter_values = 4096

# socket listen queue length
# sokect监听队列的长度
# optional, default is 5
# 可选项，默认值为5
# listen_backlog = 5

# per-keyword read buffer size
# 每个关键字读取缓冲区的大小
# optional, default is 256K
# 可选项，默认值为256K
# read_buffer = 256K

# unhinted read size (currently used when reading hits)
# unhinted读取量（目前用在阅读点击）
# optional, default is 32K
# 可选项，默认值为32K
# read_unhinted = 32K
}

# --eof--
# 结束

以下是其简单的一个应用:

<?php
require ( "../sys/lib/sphinxapi.php" );
$q = '"东方新闻"';
$host = '192.168.1.100';
$port = 9312;
$mode = SPH_MATCH_EXTENDED;
$index = '*';
$cl = new SphinxClient ();
$cl->SetServer ( $host, $port );
$cl->SetConnectTimeout ( 1 );
$cl->SetArrayResult ( true );
$cl->SetMatchMode ( $mode );
$res = $cl->Query ( $q, $index );

    if ( $res===false )
    {
                print "Query failed: " . $cl->GetLastError() . ".\n";
    } else
    {
     if ( $cl->GetLastWarning() )
                print "WARNING: " . $cl->GetLastWarning() . "\n\n";
                print "Query '$q' retrieved $res[total] of $res[total_found] matches in $res[time] sec.\n";
                print "Query stats:\n";
      if ( isset($res['words']) && is_array($res["words"]) )
             foreach ( $res["words"] as $word => $info )
                print "    '$word' found $info[hits] times in $info[docs] documents\n";
                print "\n";
}
?>

参考阅读：

Sphinx配置文件详细介绍

http://lovealwaysonline.blog.163.com/blog/static/197692011201261485729837/

数据源配置：mysql数据源

http://www.coreseek.cn/products-install/mysql/

BSD/Linux下的安装测试

http://www.coreseek.cn/products-install/install_on_bsd_linux/

posted @ 2013-07-23 16:59 骑猪南下阅读(459) 评论(0) 收藏举报

刷新页面返回顶部

骑猪南下

做一只勤奋的猪

【转】全文检索引擎Sphinx配置文件详细介绍

公告