HBase保存的各个字段意义解释
/×××××××××××××××××××××××××××××××××××××××××/
Author:xxx0624
HomePage:http://www.cnblogs.com/xxx0624/
/×××××××××××××××××××××××××××××××××××××××××/
nutch2.2.1集成HBase0.94.25, 可以查询nutch的conf文件中的gora-hbase-mapping.xml查看原文件
<gora-orm>
<table name="webpage">
<family name="p" maxVersions="1"/> <!-- This can also have params like compression, bloom filters -->
<family name="f" maxVersions="1"/>
<family name="s" maxVersions="1"/>
<family name="il" maxVersions="1"/>
<family name="ol" maxVersions="1"/>
<family name="h" maxVersions="1"/>
<family name="mtdt" maxVersions="1"/>
<family name="mk" maxVersions="1"/>
</table>
//name表示各个table的family.
//比如: f表示下载, s表示评分, il表示链入地址, ol链出地址 & etc...
<class table="webpage" keyClass="java.lang.String" name="org.apache.nutch.storage.WebPage">
<!-- fetch fields -->
<field name="baseUrl" family="f" qualifier="bas"/>//源地址
<field name="status" family="f" qualifier="st"/>
<field name="prevFetchTime" family="f" qualifier="pts"/>
<field name="fetchTime" family="f" qualifier="ts"/>//下载时间
<field name="fetchInterval" family="f" qualifier="fi"/>
<field name="retriesSinceFetch" family="f" qualifier="rsf"/>
<field name="reprUrl" family="f" qualifier="rpr"/>
<field name="content" family="f" qualifier="cnt"/>//下载的内容
<field name="contentType" family="f" qualifier="typ"/>//下载的type
<field name="protocolStatus" family="f" qualifier="prot"/>
<field name="modifiedTime" family="f" qualifier="mod"/>
<field name="prevModifiedTime" family="f" qualifier="pmod"/>
<field name="batchId" family="f" qualifier="bid"/>
<!-- parse fields -->
<field name="title" family="p" qualifier="t"/>//内容标题
<field name="text" family="p" qualifier="c"/>
<field name="parseStatus" family="p" qualifier="st"/>
<field name="signature" family="p" qualifier="sig"/>
<field name="prevSignature" family="p" qualifier="psig"/>
<!-- score fields -->
<field name="score" family="s" qualifier="s"/>
<field name="headers" family="h"/>
<field name="inlinks" family="il"/>//链入地址
<field name="outlinks" family="ol"/>//链出地址
<field name="metadata" family="mtdt"/>
<field name="markers" family="mk"/>
</class>
<table name="host">
<family name="mtdt" maxVersions="1"/>
<family name="il" maxVersions="1"/>
<family name="ol" maxVersions="1"/>
</table>
<class table="host" keyClass="java.lang.String" name="org.apache.nutch.storage.Host">
<field name="metadata" family="mtdt"/>
<field name="inlinks" family="il"/>
<field name="outlinks" family="ol"/>
</class>
</gora-orm>
keep moving...

浙公网安备 33010602011771号