【转】全文检索引擎Sphinx配置文件详细介绍

原文地址:http://blog.sina.com.cn/s/blog_6c971aa301012yfb.html

## Sphinx configuration file sample
# WARNING! While this sample file mentions all available options,
# it contains (very) short helper descriptions only. Please refer to
# doc/sphinx.html for details.
#
#警告!虽然这个配置文件列出了所有的可用选项,但是它包只包含非常短的帮助描述和简介,请访问并参考doc文件夹下的sphinx.html获得更加详细的帮助

#############################################################################
## data source definition   数据源定义
#数据源就是数据从哪儿来,也有知道数据源了,我们才能够清楚,我们需要检索的是谁吧?
#
#  LAMP兄弟连提示您他的语法格式为:
#
#  source 名字{
#     选项
#
#  }
#############################################################################

source src1
{
 # data source type. mandatory, no default value
 # 数据库类,强制性的,没有默认值
 # known types are mysql, pgsql, mssql, xmlpipe, xmlpipe2, odbc
 # 已知的类型包括mysql,pgsql,mssql,xmlpipe,xmlpipe2,odbc
 type     = mysql

 #####################################################################
 ## SQL settings (for 'mysql' and 'pgsql' types)
 #  以下SQL设置专为mysql,pgsql类型提供支持
 #####################################################################

 # some straightforward parameters for SQL source types
 # 一些简单的数据源类型参数
 sql_host    = localhost
 #sql主机
 sql_user    = test
 #sql用户
 sql_pass    = password
 #sql密码
 sql_db      = test
 #sql数据库
 sql_port    = 3306 # optional, default is 3306
 #sql数据库端口,mysql的默认值为3306,可以进行修改

 # UNIX socket name
 # unix soket名称
 # optional, default is empty (reuse client library defaults)
 # 可选,默认为空(重用默认客户端库默认)
 # usually '/var/lib/mysql/mysql.sock' on Linux
 # linux服务器上通常在以下路径'/var/lib/mysql/mysql.sock',根据具体情况不同而进行设置不同
 # usually '/tmp/mysql.sock' on FreeBSD
 # FreeBSD操作系统通常在/tmp/mysql.sock路径下
 #
 # sql_sock    = /tmp/mysql.sock
 # 在此处选择设置sock路径


 # MySQL specific client connection flags
 # mysql 具体的客户端连接标识
 # optional, default is 0
 # 可选,默认置为0
 #
 # mysql_connect_flags = 32 # enable compression
 #                            启用压缩

 # MySQL specific SSL certificate settings
 # mysql 具体的 SSL(安全套接层)证书设置
 # optional, defaults are empty
 # 可选项,默认值为空
 #
 # mysql_ssl_cert  = /etc/ssl/client-cert.pem
 # mysql ssl证书路径
 #
 # mysql_ssl_key  = /etc/ssl/client-key.pem
 # 证书key
 # mysql_ssl_ca  = /etc/ssl/cacert.pem
 # 证书ca(何为CA:CA是证书的签发机构,它是PKI的核心。CA是负责签发证书、认证证书、管理已颁发证书的机关。它要制定政策和具体步骤来验证、识别用户身份,并对用户证书进  # 行签名,以确保证书持有者的身份和公钥的拥有权。)

 # MS SQL specific Windows authentication mode flag
 # MSSQL 具体的身份验证模式标识
 # MUST be in sync with charset_type index-level setting
 # 必须在sync(同步)设置charset_type的索引级别
 # optional, default is 0
 # 可选的,默认值为0
 #
 # mssql_winauth   = 1 # use currently logged on user credentials
 #         使用当前用户登陆凭据


 # MS SQL specific Unicode indexing flag
 # MS SQL 具体的unicode 索引标识
 # optional, default is 0 (request SBCS data)
 # 可选的,默认值为0 (响应SBCS数据)
 # mssql_unicode   = 1 # request Unicode data from server
 #         从服务器响应unicode数据


 # ODBC specific DSN (data source name)
 # ODBC 具体的DSN(注:何为DSN?DSN为数据源名)
 # mandatory for odbc source type, no default value
 # 以下部份设置DSN,根据不同情况进行不同设置
 # odbc_dsn    = DBQ=C:\data;DefaultDir=C:\data;Driver={Microsoft Text Driver (*.txt; *.csv)};
 # sql_query    = SELECT id, data FROM documents.csv


 # pre-query, executed before the main fetch query
 # 预先查询,执行前的主要取的多值查询操作
 # multi-value, optional, default is empty list of queries
 # 可选,默认值为空的查询清单
 # sql_query_pre   = SET NAMES utf8
 # 预查询设置字符集utf8,切记不要加中横线
 # sql_query_pre   = SET SESSION query_cache_type=OFF
 # 设置查询的SESSION query_cache_type为关闭状态


 # main document fetch query
 # mandatory, integer document ID field MUST be the first selected column
 # 主要文件查询强制提取,整型文档ID字段必须为首选列
 sql_query    = \
  SELECT id, group_id, UNIX_TIMESTAMP(date_added) AS date_added, title, content \
  FROM documents

 # range query setup, query that must return min and max ID values
 # 查询范围设置,查询必须设置一个返回的最大值和最小值
 # optional, default is empty
 # 可选项,默认值为空
 #
 # sql_query will need to reference $start and $end boundaries
 # SQL_QUERY 需要引用一个$start 和$end 边界
 # if using ranged query:
 #
 # sql_query    = \
 # SELECT doc.id, doc.id AS group, doc.title, doc.data \
 # FROM documents doc \
 # WHERE id>=$start AND id<=$end
 #
 # sql_query_range  = SELECT MIN(id),MAX(id) FROM documents
 # 查询范围:查询最小值和最大值从documents文档。  此处的documents文档是指样例中的example.sql中的.sql文件


 # range query step
 # 范围查询步骤
 # optional, default is 1024
 # 可选项,默认值为1024
 #
 # sql_range_step  = 1000


 # unsigned integer attribute declaration
 # 无符号整型声明
 # multi-value (an arbitrary number of attributes is allowed), optional
 # 多值,准许任意数量的属性
 # optional bit size can be specified, default is 32
 # 可选的位大小是可以被指定的,默认为32
 #
 # sql_attr_uint   = author_id
 # sql_attr_uint   = forum_id:9 # 9 bits for forum_id 存储9位给forum_id这个字段
 sql_attr_uint   = group_id

 # boolean attribute declaration
 # 布尔属性声明
 # multi-value (an arbitrary number of attributes is allowed), optional
 # 多值(准许任意数量的属性),可选
 # equivalent to sql_attr_uint with 1-bit size
 # 相当于 sql_sttr_uint 给1位的长度
 # sql_attr_bool   = is_deleted


 # bigint attribute declaration
 # 大整型属性声明
 # multi-value (an arbitrary number of attributes is allowed), optional
 # 多值(准许任意数量的属性),可选
 # declares a signed (unlike uint!) 64-bit attribute
 # 声明一个签名的64位属性
 # sql_attr_bigint   = my_bigint_id


 # UNIX timestamp attribute declaration
 # unix时间戳声明
 # multi-value (an arbitrary number of attributes is allowed), optional
 # 多值(准许任意数量的属性),可选
 # similar to integer, but can also be used in date functions
 # 类似的整数,但也可以使用日期函数
 # sql_attr_timestamp = posted_ts
 # sql_attr_timestamp = last_edited_ts
 sql_attr_timestamp  = date_added

 # string ordinal attribute declaration
 # 字符串属性声明
 # multi-value (an arbitrary number of attributes is allowed), optional
 # 多值(准许任意数量的属性),可选
 # sorts strings (bytewise), and stores their indexes in the sorted list
 # 各种字符串,并且存储他们的索引在排序列表中
 # sorting by this attr is equivalent to sorting by the original strings
 # 排通通过这个属性是等效于由原始字符串排序
 # sql_attr_str2ordinal = author_name


 # floating point attribute declaration
 # 浮点字符串属性声明
 # multi-value (an arbitrary number of attributes is allowed), optional
 # 多值(准许任意数量的属性),可选
 # values are stored in single precision, 32-bit IEEE 754 format
 #  值存储于单精度中,32位长度,属于IEEE 754格式
 # sql_attr_float = lat_radians
 # sql_attr_float = long_radians


 # multi-valued attribute (MVA) attribute declaration
 # 多值属性声明
 # multi-value (an arbitrary number of attributes is allowed), optional
 # 多值(准许任意数量的属性),可选
 # MVA values are variable length lists of unsigned 32-bit integers
 # 多值属性值 是变长的无符号32位整型
 # syntax is ATTR-TYPE ATTR-NAME 'from' SOURCE-TYPE [;QUERY] [;RANGE-QUERY]
 # 语法格式是 ATTR-TYPE ATTR-NAME from 源类型 [;QUERY][;RANGE-QUERY]
 # ATTR-TYPE is 'uint' or 'timestamp'
 # 属性类型是uint或者时间戳
 # SOURCE-TYPE is 'field', 'query', or 'ranged-query'
 # 源类型是字段,query或者是ranged-query
 # QUERY is SQL query used to fetch all ( docid, attrvalue ) pairs
 # 查询是SQL查询使用获取所有(docid,属性值)的对
 # RANGE-QUERY is SQL query used to fetch min and max ID values, similar to 'sql_query_range'
 # RANGE-QUERY(区间查询)是用作SQL查询时获取最小ID和最大ID值地,类似于'sql_query_range'
 # sql_attr_multi = uint tag from query; SELECT id, tag FROM tags
 # sql_attr_multi = uint tag from ranged-query; \
 # SELECT id, tag FROM tags WHERE id>=$start AND id<=$end; \
 # SELECT MIN(id), MAX(id) FROM tags


 # post-query, executed on sql_query completion
 # 查询后,在SQL查询完成执行
 # optional, default is empty
 # 可选项,默认值为空
 # sql_query_post  =

 
 # post-index-query, executed on successful indexing completion
 #
 # post-index-query,执行成功后索引
 # optional, default is empty
 # 可选项,默认值为空
 # $maxid expands to max document ID actually fetched from DB
 # $maxid 扩展至最大的文档ID 实际上取出来自数据库
 # sql_query_post_index = REPLACE INTO counters ( id, val ) \
 # VALUES ( 'max_indexed_id', $maxid )


 # ranged query throttling, in milliseconds
 # 查询范围限制,以毫秒为单位
 # optional, default is 0 which means no delay
 # 可选,默认为0,这意味着没有延误
 # enforces given delay before each query step
 # 每个查询执行前给予延迟
 sql_ranged_throttle = 0

 # document info query, ONLY for CLI search (ie. testing and debugging)
 # 文档信息查询,只为CLI的搜索
 # optional, default is empty
 # 可选,默认值为空
 # must contain $id macro and must fetch the document by that id
 # 必须包含ID 宏并且必须通过这个ID读取文档
 sql_query_info  = SELECT * FROM documents WHERE id=$id

 # kill-list query, fetches the document IDs for kill-list
 # kill-list 查询,查询这些文档的ID用作kill列表
 # kill-list will suppress matches from preceding indexes in the same query
 # kill-list 从上述指标将取消同一查询匹配
 # optional, default is empty
 # 可选项,默认值为空
 # sql_query_killlist = SELECT id FROM documents WHERE edited>=@last_reindex


 # columns to unpack on indexer side when indexing
 # 当索引时字段解压在索引解析器
 # multi-value, optional, default is empty list
 # 多值,可选项,默认值为空列表
 # unpack_zlib = zlib_column
 # unpack_mysqlcompress = compressed_column
 # unpack_mysqlcompress = compressed_column_2


 # maximum unpacked length allowed in MySQL COMPRESS() unpacker
 # 最大解压长度准许mysql COMPRESS() 解压
 # optional, default is 16M
 # 可选项,默认值为16M
 # unpack_mysqlcompress_maxsize = 16M


 #####################################################################
 ## xmlpipe settings
 ## xmlpipe 设置
 #####################################################################

 # type    = xmlpipe
 #
 # 类型=xmlpipe

 # shell command to invoke xmlpipe stream producer
 # shell 全命令行去解析xmlpipe流
 # mandatory
 # 强制
 # xmlpipe_command = cat @CONFDIR@/test.xml

 #####################################################################
 ## xmlpipe2 settings
 ## xmlpipe2 设置
 #####################################################################

 # type    = xmlpipe2
 # xmlpipe_command = cat @CONFDIR@/test2.xml


 # xmlpipe2 field declaration
 # xmlpipe2 字段定义
 # multi-value, optional, default is empty
 # 多值,可选,默认值为空
 #
 # xmlpipe_field    = subject
 # xmlpipe_field    = content


 # xmlpipe2 attribute declaration
 # xmlpipe2 属性定义
 # multi-value, optional, default is empty
 # 多值,可选项,默认值为空
 # all xmlpipe_attr_XXX options are fully similar to sql_attr_XXX
 # 所有xmlpipe_attr_XXX选项是完全类似的sql_attr_XXX
 # xmlpipe_attr_timestamp = published
 # xmlpipe_attr_uint   = author_id


 # perform UTF-8 validation, and filter out incorrect codes
 # 执行的UTF - 8验证,并过滤掉不正确的代码
 # avoids XML parser choking on non-UTF-8 documents
 # 避免的XML解析器抑制非UTF- 8文件
 # optional, default is 0
 # 可选项,默认值为0
 # xmlpipe_fixup_utf8  = 1
}


# inherited source example
# 继承源样例
# all the parameters are copied from the parent source,
# 所有的参数都是复制来源于父级源,
# and may then be overridden in this source definition
# 并且可能覆盖这个源定义
source src1throttled : src1
{
 sql_ranged_throttle   = 100
}

#############################################################################
## index definition
## 索引定义
#############################################################################

# local index example
# 本机索引样例
#
# this is an index which is stored locally in the filesystem
# 这是一个索引,它存储于本地文件系统
# all indexing-time options (such as morphology and charsets)
# 所有索引时间参数(如形态和字符集)
# are configured per local index
# 配置如当地索引
index test1
{
 # document source(s) to index
 # 文档源去索引
 # multi-value, mandatory
 # 多值,强制
 # document IDs must be globally unique across all sources
 # 文档的所有ID必须是全局的唯一的跨越所有来源
 source   = src1

 # index files path and file name, without extension
 # 索引文件路径和文件名,无扩展
 # mandatory, path must be writable, extensions will be auto-appended
 # 强制,路径必须是可写的,扩展会自动产生
 path   = @CONFDIR@/data/test1

 # document attribute values (docinfo) storage mode
 # 文档属性值(文档信息)存储模式
 # optional, default is 'extern'
 # 可选,默认值为'extern'
 # known values are 'none', 'extern' and 'inline'
 # 已知的值为'none','extern'和'inline'
 docinfo   = extern

 # memory locking for cached data (.spa and .spi), to prevent swapping
 # 内存锁定为缓存(.spa和.spi),以防止交换
 # optional, default is 0 (do not mlock)
 # 可选项,默认值为0 (不进行内存锁)
 # requires searchd to be run from root
 mlock   = 0

 # a list of morphology preprocessors to apply
 # 一个列表预处理去 应用
 # optional, default is empty
 #可选,默认值为空
 # builtin preprocessors are 'none', 'stem_en', 'stem_ru', 'stem_enru',
 # 内置预处理器是'none','stem_en','stem_ru','stem_enru','soundex'和'metaphone';额外的处理器可用来自libstemmer是'ibstemmer_xxx',其中XXX是算法代码
 # 'soundex', and 'metaphone'; additional preprocessors available from
 # libstemmer are 'libstemmer_XXX', where XXX is algorithm code
 # (see libstemmer_c/libstemmer/modules.txt) 查看libstemmer_c/libstemmer/modules.txt
 #
 # morphology  = stem_en, stem_ru, soundex
 # morphology = libstemmer_german
 # morphology = libstemmer_sv
 morphology  = none

 # minimum word length at which to enable stemming
 #  最小词长度中去产生
 #  optional, default is 1 (stem everything)
 #  可选项,默认值为1(所有)
 # min_stemming_len = 1


 # stopword files list (space separated)
 # 非索引字的文件列表(用空格隔开)
 # optional, default is empty
 # 可选项,默认值为空
 # contents are plain text, charset_table and stemming are both applied
 # 内容为一个纯文本,charset_table和阻止都适用
 # stopwords   = @CONFDIR@/data/stopwords.txt


 # wordforms file, in "mapfrom > mapto" plain text format
 # wordforms 文件“mapfrom> mapto”,纯文本格式
 # optional, default is empty
 # 可选,默认值为空
 #
 # wordforms   = @CONFDIR@/data/wordforms.txt


 # tokenizing exceptions file
 # tokenizing例外文件
 # optional, default is empty
 # 可选项,默认值为空
 # plain text, case sensitive, space insensitive in map-from part
 # 纯文本,大小写敏感,空间不敏感
 # one "Map Several Words => ToASingleOne" entry per line
 #
 # exceptions  = @CONFDIR@/data/exceptions.txt


 # minimum indexed word length
 # 最低索引字长
 # default is 1 (index everything)
 # 默认为1(所有的索引)
 min_word_len  = 1

 # charset encoding type
 # 字符集编码类型
 #   optional, default is 'sbcs'
 #可选项,默认值为sbcs
 # known types are 'sbcs' (Single Byte CharSet) and 'utf-8'
 # 可选的类型为sbcs和utf-8
 charset_type  = utf-8

 # charset definition and case folding rules "table"
 # 字符集的定义和案例折叠规则“表”
 # optional, default value depends on charset_type
 # 可选项,默认值取决于charset_type
 # defaults are configured to include English and Russian characters only
 # 默认配置只包括英文和俄文字符
 # you need to change the table to include additional ones
 # 您需要更改的表包含附加字符集
 # this behavior MAY change in future versions
 # 这种行为可能会更改在将来的版本中
 #
 # 'sbcs' default value is
 # sbcs默认值是
 # charset_table  = 0..9, A..Z->a..z, _, a..z, U+A8->U+B8, U+B8, U+C0..U+DF->U+E0..U+FF, U+E0..U+FF
 # utf8的默认值为
 # 'utf-8' default value is
 charset_table  = 0..9, A..Z->a..z, _, a..z, U+410..U+42F->U+430..U+44F, U+430..U+44F


 # ignored characters list
 # 忽略字符列表
 # optional, default value is empty
 # 可选项,默认值为空
 # ignore_chars  = U+00AD


 # minimum word prefix length to index
 # 最小单词前缀长度索引
 # optional, default is 0 (do not index prefixes)
 # 可选项,默认值为0(不包含索引前缀)
 # min_prefix_len = 0


 # minimum word infix length to index
 # 最小字缀长度索引
 # optional, default is 0 (do not index infixes)
 # 可选项,默认值为0(不包含索引前缀)
 # min_infix_len  = 0


 # list of fields to limit prefix/infix indexing to
 #列出字段去限制前缀、中
 # optional, default value is empty (index all fields in prefix/infix mode)
 # 可选项,默认值为空(索引的所有字段,前缀/中缀模式)
 # prefix_fields  = filename
 # infix_fields  = url, domain


 # enable star-syntax (wildcards) when searching prefix/infix indexes
 # 启动star-syntax(通配符)当搜索时前缀、中缀索引
 # known values are 0 and 1
 # 可选值为0和1
 # optional, default is 0 (do not use wildcard syntax)
 # 可选项,默认值为0(不要使用通配符语法)
 # enable_star  = 1


 # n-gram length to index, for CJK indexing
 # n-gram 长度去索引,为了CJK编码索引
 # only supports 0 and 1 for now, other lengths to be implemented
 # 现在只支持0和1,其他的长度无法去实现
 # optional, default is 0 (disable n-grams)
 # 可选项,默认值为0 关闭n-grams
 # ngram_len    = 1


 # n-gram characters list, for CJK indexing
 # n-gram字符集列表,为CJK字符集索引
 # optional, default is empty
 # 可选项,默认值为空
 # ngram_chars   = U+3000..U+2FA1F


 # phrase boundary characters list
 # 短语边界字符列表
 # optional, default is empty
 #  可选项,默认值为空
 # phrase_boundary  = ., ?, !, U+2026 # horizontal ellipsis


 # phrase boundary word position increment
 # 短语边界位置增量字
 #  optional, default is 0
 #   可选项,默认值为0
 # phrase_boundary_step = 100


 # whether to strip HTML tags from incoming documents
 # 是否过滤HTML标签
 # known values are 0 (do not strip) and 1 (do strip)
 # 已知的值是0(不过虑)和1(过滤)
 # optional, default is 0
 # 可选项,默认值为0
 html_strip    = 0

 # what HTML attributes to index if stripping HTML
 # 哪些HTML属性在索引的时候需要被过滤
 # optional, default is empty (do not index anything)
 # 可选项,默认值为空(不去索引任何项)
 # html_index_attrs  = img=alt,title; a=title;


 # what HTML elements contents to strip
 # 哪些HTML元素索引的时候需要被过滤
 # optional, default is empty (do not strip element contents)
 #可选项,默认为空(不过虑这些元素内容)
 # html_remove_elements = style, script


 # whether to preopen index data files on startup
 # 在启动时是否预开索引数据文件
 # optional, default is 0 (do not preopen), searchd-only
 # 可选项,默认值为0(不预开)
 # preopen     = 1


 # whether to keep dictionary (.spi) on disk, or cache it in RAM
 # 是否保留字典(.SPI)在磁盘上,或者是缓存在内存上
 # optional, default is 0 (cache in RAM), searchd-only
 # 可选项,默认值为0(缓存于内存)
 # ondisk_dict    = 1


 # whether to enable in-place inversion (2x less disk, 90-95% speed)
 # 是否启用就地转化(2倍更少的磁盘,90-95%的速度)
 # optional, default is 0 (use separate temporary files), indexer-only
 # 可选项,默认值为0(使用单独的临时文件)
 # inplace_enable   = 1


 # in-place fine-tuning options
 # 就地微调选项
 # optional, defaults are listed below
 # 可选项,下面列出了默认项
 # inplace_hit_gap   = 0  # preallocated hitlist gap size  预分配差距的大小hitlist
 # inplace_docinfo_gap  = 0  # preallocated docinfo gap size 预分配差距的大小docinfo
 # inplace_reloc_factor = 0.1 # relocation buffer size within arena 搬迁缓冲区大小在舞台上
 # inplace_write_factor = 0.1 # write buffer size within arena 写入缓冲区大小在舞台上


 # whether to index original keywords along with stemmed versions
 # 随着索引是否源于原始版本的关键字
 # enables "=exactform" operator to work  操作人员工作
 #
 # optional, default is 0
 # 可选项,默认值为0
 # index_exact_words  = 1


 # position increment on overshort (less that min_word_len) words
 # 位于增量的overshort(不到min_word_len)词上
 # optional, allowed values are 0 and 1, default is 1
 # 可选项,准许值有0和1,默认值为1
 # overshort_step   = 1


 # position increment on stopword
 # 位于增量的非索引字
 # optional, allowed values are 0 and 1, default is 1
 # 可选项,准许值有0和1,默认值为1
 # stopword_step   = 1
}


# inherited index example
# 继承索引样例
# all the parameters are copied from the parent index,
# 所有的参数都是从父复制索引,
# and may then be overridden in this index definition
#  然后,可以在该索引覆盖的定义
index test1stemmed : test1
{
 path   = @CONFDIR@/data/test1stemmed
 morphology  = stem_en
}


# distributed index example
# 分布式索引的例子
# this is a virtual index which can NOT be directly indexed,
# 这是一个虚拟的指标,不能直接索引
# and only contains references to other local and/or remote indexes
# 并且只包含其他本地及/或远程索引引用
index dist1
{
 # 'distributed' index type MUST be specified
 # '分布'必须指定索引类型
 type    = distributed

 # local index to be searched
 # 本地索引进行搜索
 # there can be many local indexes configured
 #  可以有很多配置本地索引
 local    = test1
 local    = test1stemmed

 # remote agent
 # 远程代理
 # multiple remote agents may be specified
 # 多个远程代理可以指定
 # syntax for TCP connections is 'hostname:port:index1,[index2[,...]]'
 # TCP连接的语法'hostname:port:index1,[index2[,...]]'
 # syntax for local UNIX connections is '/path/to/socket:index1,[index2[,...]]'
 # 语法本地UNIX连接 '/path/to/socket:index1,[index2[,...]]'
 agent    = localhost:9313:remote1
 agent    = localhost:9314:remote2,remote3
 # agent    = /var/run/searchd.sock:remote4
        #代理
 # blackhole remote agent, for debugging/testing
 # 黑洞远程代理,调试/测试
 # network errors and search results will be ignored
 # 网络错误和搜索结果将被忽略
 # agent_blackhole  = testbox:9312:testindex1,testindex2
 


 # remote agent connection timeout, milliseconds
 # 远程代理连接超时,毫秒
 # optional, default is 1000 ms, ie. 1 sec
 # 可选项,默认值为1000ms,1秒
 agent_connect_timeout = 1000

 # remote agent query timeout, milliseconds
 #  远程代理查询超时,毫秒
 # optional, default is 3000 ms, ie. 3 sec
 # 可选项,默认值为3000毫秒 相当于3秒
 agent_query_timeout  = 3000
}

#############################################################################
## indexer settings
## 索引器设置
#############################################################################

indexer
{
 # memory limit, in bytes, kiloytes (16384K) or megabytes (256M)
 # 内存大小限制,16384K或兆字节(256M的)
 # optional, default is 32M, max is 2047M, recommended is 256M to 1024M
 # 可选项,默认值为32M,最大值为2047M,推荐的是256M到1024M
 mem_limit   = 32M

 # maximum IO calls per second (for I/O throttling)
 # 最高每秒的IO调用
 # optional, default is 0 (unlimited)
 # 可选项,默认值为0 不限制
 # max_iops   = 40


 # maximum IO call size, bytes (for I/O throttling)
 # 最大IO调用大小,字节(用于I / O限制)
 # optional, default is 0 (unlimited)
 # 可选项,默认值为0不限制
 # max_iosize  = 1048576


 # maximum xmlpipe2 field length, bytes
 # 最大xmlpipe2字段长度,字节
 # optional, default is 2M
 # 可选项,默认值为2M
 # max_xmlpipe2_field = 4M


 # write buffer size, bytes
 # 写入缓冲区大小,字节
 #  several (currently up to 4) buffers will be allocated
 # 几个(目前最多4个)的缓冲区将被分配
 # write buffers are allocated in addition to mem_limit
 # 写缓冲区分配除了mem_limit
 # optional, default is 1M
 # 可选项,默认值为1M
 # write_buffer  = 1M
}

#############################################################################
## searchd settings
## searchd 设置
#
#############################################################################

searchd
{
 # hostname, port, or hostname:port, or /unix/socket/path to listen on
 # 主机名,端口,或者为主机名:端口,或者是/unix/socket/path去监听一个多值,准许多点监听
 # multi-value, multiple listen points are allowed
 # optional, default is 0.0.0.0:9312 (listen on all interfaces, port 9312)
 # 可选项,默认值为0.0.0.1:9312(监听所有的接口,端口为9312)
 # listen    = 127.0.0.1
 # listen    = 192.168.0.1:9312
 # listen    = 9312
 # listen    = /var/run/searchd.sock


 # log file, searchd run info is logged here
 # 日志文件,searchd的信息被记录在这里运行
 # optional, default is 'searchd.log'
 log     = @CONFDIR@/log/searchd.log

 # query log file, all search queries are logged here
 # 查询日志文件,所有的搜索记录全部在记录在这儿
 # optional, default is empty (do not log queries)
 # 可选项,默认值为空(不记录查询日志)
 query_log   = @CONFDIR@/log/query.log

 # client read timeout, seconds
 # 客户端读取超时时间,秒为单位
 # optional, default is 5
 # 可选项,默认值为5
 read_timeout  = 5

 # request timeout, seconds
 # 响应请求超时时间,单为为秒
 # optional, default is 5 minutes
 # 可选项,默认值为5分钟
 client_timeout  = 300

 # maximum amount of children to fork (concurrent searches to run)
 #
 # optional, default is 0 (unlimited)
 # 可选项,默认值为0(无限)
 max_children  = 30

 # PID file, searchd process ID file name
 # 进程文件,搜索的处理ID文件名
 # mandatory
 # 必选
 pid_file   = @CONFDIR@/log/searchd.pid

 # max amount of matches the daemon ever keeps in RAM, per-index
 # WARNING, THERE'S ALSO PER-QUERY LIMIT, SEE SetLimits() API CALL
 # default is 1000 (just like Google)
 max_matches   = 1000

 # seamless rotate, prevents rotate stalls if precaching huge datasets
 # optional, default is 1
 # 可选项,默认值为1
 seamless_rotate  = 1

 # whether to forcibly preopen all indexes on startup
 # 是否在启动时强行预开所有的索引
 # optional, default is 0 (do not preopen)
 # 可选项,默认值为0(不进行预开)
 preopen_indexes  = 0

 # whether to unlink .old index copies on succesful rotation.
 # 是否断开,旧的索引复本成功的扭转。
 # optional, default is 1 (do unlink)
 # 可选项,默认值为1(断开连接)
  unlink_old   = 1

 # attribute updates periodic flush timeout, seconds
 # updates will be automatically dumped to disk this frequently
 # optional, default is 0 (disable periodic flush)
 #
 # attr_flush_period = 900


 # instance-wide ondisk_dict defaults (per-index value take precedence)
 # optional, default is 0 (precache all dictionaries in RAM)
 #
 # ondisk_dict_default = 1


 # MVA updates pool size
 # MVA 更新池的大小
 # shared between all instances of searchd, disables attr flushes!
 # 共享的所有实例之间的searchd,禁用属性冲突
 # optional, default size is 1M
 # 可选项,默认值为1M
 mva_updates_pool = 1M

 # max allowed network packet size
 # 最大准许的网络包大小
 # limits both query packets from clients, and responses from agents
 #  限制查询包大小来自客户端和响应代理
 # optional, default size is 8M
 # 可选项,默认值为8M
 max_packet_size  = 8M

 # crash log path
 # 崩溃日志路径
 # searchd will (try to) log crashed query to 'crash_log_path.PID' file
 # searchd 将去偿试记录崩溃查询到'crash_log_path.pid'文件
 # optional, default is empty (do not create crash logs)
 # 可选项,默认值为空。(不记录崩溃日志)
 #   crash_log_path  = @CONFDIR@/log/crash


 # max allowed per-query filter count
 # 最大允许每个查询过滤器计数
 # optional, default is 256
 # 可选项,默认值为256
 max_filters   = 256

 # max allowed per-filter values count
 # 最大允许每个过滤值的计数
 # optional, default is 4096
 # 可选项,默认值为4096
 max_filter_values = 4096


 # socket listen queue length
 # sokect监听队列的长度
 # optional, default is 5
 # 可选项,默认值为5
 # listen_backlog  = 5


 # per-keyword read buffer size
 # 每个关键字读取缓冲区的大小
 # optional, default is 256K
 # 可选项,默认值为256K
 # read_buffer   = 256K


 # unhinted read size (currently used when reading hits)
 # unhinted读取量(目前用在阅读点击)
 # optional, default is 32K
 # 可选项,默认值为32K
 # read_unhinted  = 32K
}

# --eof--
# 结束

  以下是其简单的一个应用:

<?php
  require ( "../sys/lib/sphinxapi.php" );
  $q = '"东方新闻"';
  $host = '192.168.1.100';
  $port = 9312;
  $mode = SPH_MATCH_EXTENDED;
  $index = '*';
  $cl = new SphinxClient ();
  $cl->SetServer ( $host, $port );
  $cl->SetConnectTimeout ( 1 );
  $cl->SetArrayResult ( true );
  $cl->SetMatchMode ( $mode );
 $res = $cl->Query ( $q, $index );
 
    if ( $res===false )
    {
                print "Query failed: " . $cl->GetLastError() . ".\n";
    } else
    {
     if ( $cl->GetLastWarning() )
                print "WARNING: " . $cl->GetLastWarning() . "\n\n";
                print "Query '$q' retrieved $res[total] of $res[total_found] matches in $res[time] sec.\n";
                print "Query stats:\n";
      if ( isset($res['words']) && is_array($res["words"]) )
             foreach ( $res["words"] as $word => $info )
                print "    '$word' found $info[hits] times in $info[docs] documents\n";
                print "\n";
 }
?>

 

参考阅读:

Sphinx配置文件详细介绍

http://lovealwaysonline.blog.163.com/blog/static/197692011201261485729837/

数据源配置:mysql数据源

http://www.coreseek.cn/products-install/mysql/

BSD/Linux下的安装测试

http://www.coreseek.cn/products-install/install_on_bsd_linux/

 

posted @ 2013-07-23 16:59  骑猪南下  阅读(418)  评论(0编辑  收藏  举报